blob: fb3ffdf57d3a4a4981c092f212c91e115a7af573 [file] [log] [blame]
package com.ibm.test.translit;
import com.ibm.test.*;
import com.ibm.text.*;
import com.ibm.util.Utility;
import java.io.*;
import java.text.ParseException;
/**
* @test
* @summary Round trip test of Transliterator
*/
public class RoundTripTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new RoundTripTest().run(args);
}
public void TestHiragana() throws IOException, ParseException {
new Test("Latin-Kana",
TestUtility.LATIN_SCRIPT, TestUtility.HIRAGANA_SCRIPT)
.test("[a-z]", "[\u3040-\u3094]", this);
}
public void TestKatakana() throws IOException, ParseException {
new Test("Latin-Kana",
TestUtility.LATIN_SCRIPT, TestUtility.KATAKANA_SCRIPT)
.test("[A-Z]", "[\u30A1-\u30FA]", this);
}
public void TestArabic() throws IOException, ParseException {
new Test("Latin-Arabic",
TestUtility.LATIN_SCRIPT, TestUtility.ARABIC_SCRIPT)
.test("[a-z]", "[\u0620-\u065F-[\u0640]]", this);
}
public void TestHebrew() throws IOException, ParseException {
new Test("Latin-Hebrew",
TestUtility.LATIN_SCRIPT, TestUtility.HEBREW_SCRIPT)
.test(null, "[\u05D0-\u05EF]", this);
}
public void TestHangul() throws IOException, ParseException {
Test t = new TestHangul();
t.setPairLimit(30); // Don't run full test -- too long
t.test(null, null, this);
}
public void TestJamo() throws IOException, ParseException {
Test t = new Test("Latin-Jamo",
TestUtility.LATIN_SCRIPT, TestUtility.JAMO_SCRIPT);
t.setErrorLimit(200); // Don't run full test -- too long
//t.test("[[a-z]-[fqvxz]]", null, this);
t.test("[a-z]", null, this);
}
public void TestJamoHangul() throws IOException, ParseException {
Test t = new Test("Latin-Hangul",
TestUtility.LATIN_SCRIPT, TestUtility.HANGUL_SCRIPT);
t.setErrorLimit(50); // Don't run full test -- too long
t.test("[a-z]", null, this);
}
public void TestGreek() throws IOException, ParseException {
new Test("Latin-Greek",
TestUtility.LATIN_SCRIPT, TestUtility.GREEK_SCRIPT)
.test(null, "[\u0380-\u03CF]", this);
}
public void TestCyrillic() throws IOException, ParseException {
new Test("Latin-Cyrillic",
TestUtility.LATIN_SCRIPT, TestUtility.CYRILLIC_SCRIPT)
.test(null, "[\u0401\u0410-\u044F\u0451]", this);
}
static class Test {
PrintWriter out;
private String transliteratorID;
private byte sourceScript;
private byte targetScript;
private int errorLimit = Integer.MAX_VALUE;
private int errorCount = 0;
private int pairLimit = 0x10000;
UnicodeSet sourceRange;
UnicodeSet targetRange;
TestLog log;
/*
* create a test for the given script transliterator.
*/
Test(String transliteratorID,
byte sourceScript, byte targetScript) {
this.transliteratorID = transliteratorID;
this.sourceScript = sourceScript;
this.targetScript = targetScript;
}
public void setErrorLimit(int limit) {
errorLimit = limit;
}
public void setPairLimit(int limit) {
pairLimit = limit;
}
public void test(String sourceRange, String targetRange, TestLog log)
throws java.io.IOException, java.text.ParseException {
if (sourceRange != null && sourceRange.length() > 0) {
this.sourceRange = new UnicodeSet(sourceRange);
}
if (targetRange != null && targetRange.length() > 0) {
this.targetRange = new UnicodeSet(targetRange);
}
if (this.sourceRange == null) this.sourceRange = new UnicodeSet("[a-zA-Z]");
this.log = log;
log.logln(Utility.escape("Source: " + this.sourceRange));
log.logln(Utility.escape("Target: " + this.targetRange));
// make a UTF-8 output file we can read with a browser
// note: check that every transliterator transliterates the null string correctly!
String logFileName = "test_" + transliteratorID + "_"
+ sourceScript + "_" + targetScript + ".html";
log.logln("Creating log file " + logFileName);
out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(logFileName), "UTF8"), 4*1024));
//out.write('\uFFEF'); // BOM
out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
out.println("<HTML><HEAD>");
out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
out.println("<BODY>");
out.println("<TABLE>");
try {
test2();
out.println("</TABLE>");
} catch (TestTruncated e) {
out.println("</TABLE>" + e.getMessage());
}
out.println("</BODY></HTML>");
out.close();
if (errorCount > 0) {
log.errln(transliteratorID + " errors: " + errorCount + ", see " + logFileName);
} else {
log.logln(transliteratorID + " ok");
new File(logFileName).delete();
}
}
public void test2() {
Transliterator sourceToTarget = Transliterator.getInstance(transliteratorID);
Transliterator targetToSource = sourceToTarget.getInverse();
log.logln("Checking that source characters convert to target - Singles");
for (char c = 0; c < 0xFFFF; ++c) {
if (TestUtility.isUnassigned(c) ||
!isSource(c)) continue;
String cs = String.valueOf(c);
String targ = sourceToTarget.transliterate(String.valueOf(cs));
if (!isReceivingTarget(targ)) {
logWrongScript("Source-Target", cs, targ);
}
}
log.logln("Checking that source characters convert to target - Doubles");
for (char c = 0; c < 0xFFFF; ++c) {
if (TestUtility.isUnassigned(c) ||
!isSource(c)) continue;
for (char d = 0; d < 0xFFFF; ++d) {
if (TestUtility.isUnassigned(d) ||
!isSource(d)) continue;
String cs = String.valueOf(c) + d;
String targ = sourceToTarget.transliterate(cs);
if (!isReceivingTarget(targ)) {
logWrongScript("Source-Target", cs, targ);
}
}
}
log.logln("Checking that target characters convert to source and back - Singles");
for (char c = 0; c < 0xFFFF; ++c) {
if (TestUtility.isUnassigned(c) ||
!isTarget(c)) continue;
String cs = String.valueOf(c);
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ)) {
logWrongScript("Target-Source", cs, targ);
} else if (!cs.equals(reverse)) {
logRoundTripFailure(cs, targ, reverse);
}
}
log.logln("Checking that target characters convert to source and back - Doubles");
int count = 0;
StringBuffer buf = new StringBuffer("aa");
for (char c = 0; c < 0xFFFF; ++c) {
if (TestUtility.isUnassigned(c) ||
!isTarget(c)) continue;
if (++count > pairLimit) {
throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
}
buf.setCharAt(0, c);
log.log(TestUtility.hex(c));
for (char d = 0; d < 0xFFFF; ++d) {
if (TestUtility.isUnassigned(d) ||
!isTarget(d)) continue;
buf.setCharAt(1, d);
String cs = buf.toString();
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ)) {
logWrongScript("Target-Source", cs, targ);
} else if (!cs.equals(reverse)) {
logRoundTripFailure(cs, targ, reverse);
}
}
}
log.logln("");
}
final void logWrongScript(String label, String from, String to) {
out.println("<TR><TD>Fail " + label + ":</TD><TD><FONT SIZE=\"6\">" +
from + "</FONT></TD><TD>(" +
TestUtility.hex(from) + ") =></TD><TD><FONT SIZE=\"6\">" +
to + "</FONT></TD><TD>(" +
TestUtility.hex(to) + ")</TD></TR>" );
if (++errorCount >= errorLimit) {
throw new TestTruncated("Test truncated; too many failures");
}
}
final void logRoundTripFailure(String from, String to, String back) {
out.println("<TR><TD>Fail Roundtrip:</TD><TD><FONT SIZE=\"6\">" +
from + "</FONT></TD><TD>(" +
TestUtility.hex(from) + ") =></TD><TD>" +
to + "</TD><TD>(" +
TestUtility.hex(to) + ") =></TD><TD><FONT SIZE=\"6\">" +
back + "</TD><TD>(" +
TestUtility.hex(back) + ")</TD></TR>" );
if (++errorCount >= errorLimit) {
throw new TestTruncated("Test truncated; too many failures");
}
}
/*
* Characters to filter for source-target mapping completeness
* Typically is base alphabet, minus extended characters
* Default is ASCII letters for Latin
*/
public boolean isSource(char c) {
byte script = TestUtility.getScript(c);
if (script != sourceScript) return false;
if (!TestUtility.isLetter(c)) return false;
if (!sourceRange.contains(c)) return false;
return true;
}
/*
* Characters to check for target back to source mapping.
* Typically the same as the target script, plus punctuation
*/
public boolean isReceivingSource(char c) {
byte script = TestUtility.getScript(c);
return (script == sourceScript || script == TestUtility.COMMON_SCRIPT);
}
/*
* Characters to filter for target-source mapping
* Typically is base alphabet, minus extended characters
*/
public boolean isTarget(char c) {
byte script = TestUtility.getScript(c);
if (script != targetScript) return false;
if (!TestUtility.isLetter(c)) return false;
if (targetRange != null && !targetRange.contains(c)) return false;
return true;
}
/*
* Characters to check for target-source mapping
* Typically the same as the source script, plus punctuation
*/
public boolean isReceivingTarget(char c) {
byte script = TestUtility.getScript(c);
return (script == targetScript || script == TestUtility.COMMON_SCRIPT);
}
final boolean isSource(String s) {
for (int i = 0; i < s.length(); ++i) {
if (!isSource(s.charAt(i))) return false;
}
return true;
}
final boolean isTarget(String s) {
for (int i = 0; i < s.length(); ++i) {
if (!isTarget(s.charAt(i))) return false;
}
return true;
}
final boolean isReceivingSource(String s) {
for (int i = 0; i < s.length(); ++i) {
if (!isReceivingSource(s.charAt(i))) return false;
}
return true;
}
final boolean isReceivingTarget(String s) {
for (int i = 0; i < s.length(); ++i) {
if (!isReceivingTarget(s.charAt(i))) return false;
}
return true;
}
static class TestTruncated extends RuntimeException {
TestTruncated(String msg) {
super(msg);
}
}
}
static class TestHangul extends Test {
TestHangul () {
super("Jamo-Hangul", TestUtility.JAMO_SCRIPT, TestUtility.HANGUL_SCRIPT);
}
public boolean isSource(char c) {
if (0x1113 <= c && c <= 0x1160) return false;
if (0x1176 <= c && c <= 0x11F9) return false;
if (0x3131 <= c && c <= 0x318E) return false;
return super.isSource(c);
}
}
}