blob: 23f51db5c8057c95e4376e156f6b377a93ead284 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2010, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.dev.test.bidi;
import java.io.BufferedReader;
import java.io.IOException;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.test.TestUtil;
import com.ibm.icu.lang.UCharacterDirection;
import com.ibm.icu.text.Bidi;
import com.ibm.icu.text.BidiClassifier;
/**
* @author Markus W. Scherer
* BiDi conformance test, using the Unicode BidiTest.txt file.
* Ported from ICU4C intltest/bidiconf.cpp .
*/
public class BiDiConformanceTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new BiDiConformanceTest().run(args);
}
public BiDiConformanceTest() {}
public void TestBidiTest() throws IOException {
BufferedReader bidiTestFile=TestUtil.getDataReader("unicode/BidiTest.txt");
Bidi ubidi=new Bidi();
ubidi.setCustomClassifier(new ConfTestBidiClassifier());
lineNumber=0;
levelsCount=0;
orderingCount=0;
errorCount=0;
outerLoop:
while(errorCount<10 && (line=bidiTestFile.readLine())!=null) {
++lineNumber;
lineIndex=0;
// Remove trailing comments and whitespace.
int commentStart=line.indexOf('#');
if(commentStart>=0) {
line=line.substring(0, commentStart);
}
if(!skipWhitespace()) {
continue; // Skip empty and comment-only lines.
}
if(line.charAt(lineIndex)=='@') {
++lineIndex;
if(line.startsWith("Levels:", lineIndex)) {
lineIndex+=7;
parseLevels();
} else if(line.startsWith("Reorder:", lineIndex)) {
lineIndex+=8;
parseOrdering();
}
// Skip unknown @Xyz: ...
} else {
parseInputStringFromBiDiClasses();
if(!skipWhitespace() || line.charAt(lineIndex++)!=';') {
errln("missing ; separator on input line "+line);
return;
}
int bitset=Integer.parseInt(line.substring(lineIndex).trim(), 16);
// Loop over the bitset.
for(int i=0; i<=3; ++i) {
if((bitset&(1<<i))!=0) {
ubidi.setPara(inputString, paraLevels[i], null);
byte actualLevels[]=ubidi.getLevels();
if(!checkLevels(actualLevels, paraLevelNames[i])) {
continue outerLoop;
}
if(!checkOrdering(ubidi, paraLevelNames[i])) {
continue outerLoop;
}
}
}
}
}
}
private static final byte paraLevels[]={
Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT,
0,
1,
Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT
};
private static final String paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
private void parseLevels() {
directionBits=0;
levelsCount=0;
if(skipWhitespace()) {
String[] levelStrings=line.substring(lineIndex).split("[ \t]+");
for(String levelString: levelStrings) {
if(levelString.equals("x")) {
levels[levelsCount++]=Bidi.LEVEL_DEFAULT_LTR;
} else {
int value=Integer.parseInt(levelString);
if(value<0 || value>(Bidi.MAX_EXPLICIT_LEVEL+1)) {
throw new IllegalArgumentException(
"@Levels: parse error at "+levelString+" in "+line);
}
levels[levelsCount++]=(byte)value;
directionBits|=(1<<(value&1));
}
}
}
}
private void parseOrdering() {
orderingCount=0;
if(skipWhitespace()) {
String[] orderingStrings=line.substring(lineIndex).split("[ \t]+");
for(String orderingString: orderingStrings) {
int value=Integer.parseInt(orderingString);
if(value>=1000) {
throw new IllegalArgumentException(
"@Reorder: parse error at "+orderingString+" in "+line);
}
ordering[orderingCount++]=value;
}
}
}
private static char charFromBiDiClass[]={
0x6c, // 'l' for L
0x52, // 'R' for R
0x33, // '3' for EN
0x2d, // '-' for ES
0x25, // '%' for ET
0x39, // '9' for AN
0x2c, // ',' for CS
0x2f, // '/' for B
0x5f, // '_' for S
0x20, // ' ' for WS
0x3d, // '=' for ON
0x65, // 'e' for LRE
0x6f, // 'o' for LRO
0x41, // 'A' for AL
0x45, // 'E' for RLE
0x4f, // 'O' for RLO
0x2a, // '*' for PDF
0x60, // '`' for NSM
0x7c // '|' for BN
};
private class ConfTestBidiClassifier extends BidiClassifier {
public ConfTestBidiClassifier() {
super(null);
}
@Override
public int classify(int c) {
for(int i=0; i<charFromBiDiClass.length; ++i) {
if(c==charFromBiDiClass[i]) {
return i;
}
}
// Character not in our hardcoded table.
// Should not occur during testing.
return Bidi.CLASS_DEFAULT;
}
}
private static final int biDiClassNameLengths[]={
1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
};
private void parseInputStringFromBiDiClasses() {
inputStringBuilder.delete(0, 0x7fffffff);
/*
* Lengthy but fast BiDi class parser.
* A simple parser could terminate or extract the name string and use
* int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
* but that makes this test take significantly more time.
*/
char c0, c1, c2;
while(skipWhitespace() && (c0=line.charAt(lineIndex))!=';') {
int biDiClass=UCharacterDirection.CHAR_DIRECTION_COUNT;
// Compare each character once until we have a match on
// a complete, short BiDi class name.
if(c0=='L') {
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='R') {
if((c2=line.charAt(lineIndex+2))=='E') {
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;
} else if(c2=='O') {
biDiClass=UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;
}
} else {
biDiClass=UCharacterDirection.LEFT_TO_RIGHT;
}
} else if(c0=='R') {
if((lineIndex+2)<line.length() && line.charAt(lineIndex+1)=='L') {
if((c2=line.charAt(lineIndex+2))=='E') {
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;
} else if(c2=='O') {
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;
}
} else {
biDiClass=UCharacterDirection.RIGHT_TO_LEFT;
}
} else if(c0=='E') {
if((lineIndex+1)>=line.length()) {
// too short
} else if((c1=line.charAt(lineIndex+1))=='N') {
biDiClass=UCharacterDirection.EUROPEAN_NUMBER;
} else if(c1=='S') {
biDiClass=UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR;
} else if(c1=='T') {
biDiClass=UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR;
}
} else if(c0=='A') {
if((lineIndex+1)>=line.length()) {
// too short
} else if((c1=line.charAt(lineIndex+1))=='L') {
biDiClass=UCharacterDirection.RIGHT_TO_LEFT_ARABIC;
} else if(c1=='N') {
biDiClass=UCharacterDirection.ARABIC_NUMBER;
}
} else if(c0=='C' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='S') {
biDiClass=UCharacterDirection.COMMON_NUMBER_SEPARATOR;
} else if(c0=='B') {
if((lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='N') {
biDiClass=UCharacterDirection.BOUNDARY_NEUTRAL;
} else {
biDiClass=UCharacterDirection.BLOCK_SEPARATOR;
}
} else if(c0=='S') {
biDiClass=UCharacterDirection.SEGMENT_SEPARATOR;
} else if(c0=='W' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='S') {
biDiClass=UCharacterDirection.WHITE_SPACE_NEUTRAL;
} else if(c0=='O' && (lineIndex+1)<line.length() && line.charAt(lineIndex+1)=='N') {
biDiClass=UCharacterDirection.OTHER_NEUTRAL;
} else if(c0=='P' && (lineIndex+2)<line.length() &&
line.charAt(lineIndex+1)=='D' && line.charAt(lineIndex+2)=='F') {
biDiClass=UCharacterDirection.POP_DIRECTIONAL_FORMAT;
} else if(c0=='N' && (lineIndex+2)<line.length() &&
line.charAt(lineIndex+1)=='S' && line.charAt(lineIndex+2)=='M') {
biDiClass=UCharacterDirection.DIR_NON_SPACING_MARK;
}
// Now we verify that the class name is terminated properly,
// and not just the start of a longer word.
int biDiClassNameLength=biDiClassNameLengths[biDiClass];
char c;
if( biDiClass==UCharacterDirection.CHAR_DIRECTION_COUNT ||
((lineIndex+biDiClassNameLength)<line.length() &&
!isInvWhitespace(c=line.charAt(lineIndex+biDiClassNameLength)) &&
c!=';')
) {
throw new IllegalArgumentException(
"BiDi class string not recognized at "+line.substring(lineIndex)+" in "+line);
}
inputStringBuilder.append(charFromBiDiClass[biDiClass]);
lineIndex+=biDiClassNameLength;
}
inputString=inputStringBuilder.toString();
}
private static char printLevel(byte level) {
if(level<Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT) {
return (char)('0'+level);
} else {
return 'x';
}
}
private static int getDirectionBits(byte actualLevels[]) {
int actualDirectionBits=0;
for(int i=0; i<actualLevels.length; ++i) {
actualDirectionBits|=(1<<(actualLevels[i]&1));
}
return actualDirectionBits;
}
private boolean checkLevels(byte actualLevels[], String paraLevelName) {
boolean isOk=true;
if(levelsCount!=actualLevels.length) {
errln("Wrong number of level values; expected "+levelsCount+" actual "+actualLevels.length);
isOk=false;
} else {
for(int i=0; i<actualLevels.length; ++i) {
if(levels[i]!=actualLevels[i] && levels[i]<Bidi.LEVEL_DEFAULT_LTR) {
if(directionBits!=3 && directionBits==getDirectionBits(actualLevels)) {
// ICU used a shortcut:
// Since the text is unidirectional, it did not store the resolved
// levels but just returns all levels as the paragraph level 0 or 1.
// The reordering result is the same, so this is fine.
break;
} else {
errln("Wrong level value at index "+i+"; expected levels[i] actual "+actualLevels[i]);
isOk=false;
break;
}
}
}
}
if(!isOk) {
printErrorLine(paraLevelName);
StringBuilder els=new StringBuilder("Expected levels: ");
int i;
for(i=0; i<levelsCount; ++i) {
els.append(' ').append(printLevel(levels[i]));
}
StringBuilder als=new StringBuilder("Actual levels: ");
for(i=0; i<actualLevels.length; ++i) {
als.append(' ').append(printLevel(actualLevels[i]));
}
errln(els.toString());
errln(als.toString());
}
return isOk;
}
// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
// does not work for custom BiDi class assignments
// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
// Therefore we just skip the indexes for BiDi controls while comparing
// with the expected ordering that has them omitted.
private boolean checkOrdering(Bidi ubidi, String paraLevelName) {
boolean isOk=true;
int resultLength=ubidi.getResultLength(); // visual length including BiDi controls
int i, visualIndex;
// Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
// and loop over each run's indexes, but that seems unnecessary for this test code.
for(i=visualIndex=0; i<resultLength; ++i) {
int logicalIndex=ubidi.getLogicalIndex(i);
if(levels[logicalIndex]>=Bidi.LEVEL_DEFAULT_LTR) {
continue; // BiDi control, omitted from expected ordering.
}
if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
errln("Wrong ordering value at visual index "+visualIndex+"; expected "+
ordering[visualIndex]+" actual "+logicalIndex);
isOk=false;
break;
}
++visualIndex;
}
// visualIndex is now the visual length minus the BiDi controls,
// which should match the length of the BidiTest.txt ordering.
if(isOk && orderingCount!=visualIndex) {
errln("Wrong number of ordering values; expected "+orderingCount+" actual "+visualIndex);
isOk=false;
}
if(!isOk) {
printErrorLine(paraLevelName);
StringBuilder eord=new StringBuilder("Expected ordering: ");
for(i=0; i<orderingCount; ++i) {
eord.append(' ').append((char)('0'+ordering[i]));
}
StringBuilder aord=new StringBuilder("Actual ordering: ");
for(i=0; i<resultLength; ++i) {
int logicalIndex=ubidi.getLogicalIndex(i);
if(levels[logicalIndex]<Bidi.LEVEL_DEFAULT_LTR) {
aord.append(' ').append((char)('0'+logicalIndex));
}
}
errln(eord.toString());
errln(aord.toString());
}
return isOk;
}
private void printErrorLine(String paraLevelName) {
++errorCount;
errln(String.format("Input line %5d: %s", lineNumber, line));
errln("Input string: "+inputString);
errln("Para level: "+paraLevelName);
}
private static boolean isInvWhitespace(char c) {
return ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n');
}
/**
* Skip isInvWhitespace() characters.
* @return true if line.charAt[lineIndex] is a non-whitespace, false if lineIndex>=line.length()
*/
private boolean skipWhitespace() {
while(lineIndex<line.length()) {
if(!isInvWhitespace(line.charAt(lineIndex))) {
return true;
}
++lineIndex;
}
return false;
}
private String line;
private int lineIndex;
private byte levels[]=new byte[1000]; // UBiDiLevel
private int directionBits;
private int ordering[]=new int[1000];
private int lineNumber;
private int levelsCount;
private int orderingCount;
private int errorCount;
private String inputString;
private StringBuilder inputStringBuilder=new StringBuilder();
}