blob: ee99ed03532962ef5b8f98e1a381200ba7c8f1b2 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2009-2014, Google, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.dev.test.translit;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
/**
* @author markdavis
*
*/
public class AnyScriptTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new AnyScriptTest().run(args);
}
public void TestContext() {
Transliterator t = Transliterator.createFromRules("foo", "::[bc]; a{b}d > B;", Transliterator.FORWARD);
String sample = "abd abc b";
assertEquals("context works", "aBd abc b", t.transform(sample));
}
public void TestScripts(){
// get a couple of characters of each script for testing
StringBuffer testBuffer = new StringBuffer();
for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
UnicodeSet test = new UnicodeSet().applyPropertyAlias("script", UScript.getName(script));
int count = Math.min(20, test.size());
for (int i = 0; i < count; ++i){
testBuffer.append(UTF16.valueOf(test.charAt(i)));
}
}
String test = testBuffer.toString();
logln("Test line: " + test);
int inclusion = getInclusion();
boolean testedUnavailableScript = false;
for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
if (script == UScript.COMMON || script == UScript.INHERITED) {
continue;
}
// if the inclusion rate is not 10, skip all but a small number of items.
// Make sure, however, that we test at least one unavailable script
if (inclusion < 10 && script != UScript.LATIN
&& script != UScript.HAN
&& script != UScript.HIRAGANA
&& testedUnavailableScript
) {
continue;
}
String scriptName = UScript.getName(script); // long name
ULocale locale = new ULocale(scriptName);
if (locale.getLanguage().equals("new") || locale.getLanguage().equals("pau")) {
if (logKnownIssue("11171",
"long script name loosely looks like a locale ID with a known likely script")) {
continue;
}
}
Transliterator t;
try {
t = Transliterator.getInstance("any-" + scriptName);
} catch (Exception e) {
testedUnavailableScript = true;
logln("Skipping unavailable: " + scriptName);
continue; // we don't handle all scripts
}
logln("Checking: " + scriptName);
if (t != null) {
t.transform(test); // just verify we don't crash
}
String shortScriptName = UScript.getShortName(script); // 4-letter script code
try {
t = Transliterator.getInstance("any-" + shortScriptName);
} catch (Exception e) {
errln("Transliterator.getInstance() worked for \"any-" + scriptName +
"\" but not for \"any-" + shortScriptName + '\"');
}
t.transform(test); // just verify we don't crash
}
}
/**
* Check to make sure that wide characters are converted when going to narrow scripts.
*/
public void TestForWidth(){
Transliterator widen = Transliterator.getInstance("halfwidth-fullwidth");
Transliterator narrow = Transliterator.getInstance("fullwidth-halfwidth");
UnicodeSet ASCII = new UnicodeSet("[:ascii:]");
String lettersAndSpace = "abc def";
final String punctOnly = "( )";
String wideLettersAndSpace = widen.transform(lettersAndSpace);
String widePunctOnly = widen.transform(punctOnly);
assertContainsNone("Should be wide", ASCII, wideLettersAndSpace);
assertContainsNone("Should be wide", ASCII, widePunctOnly);
String back;
back = narrow.transform(wideLettersAndSpace);
assertEquals("Should be narrow", lettersAndSpace, back);
back = narrow.transform(widePunctOnly);
assertEquals("Should be narrow", punctOnly, back);
Transliterator latin = Transliterator.getInstance("any-Latn");
back = latin.transform(wideLettersAndSpace);
assertEquals("Should be ascii", lettersAndSpace, back);
back = latin.transform(widePunctOnly);
assertEquals("Should be ascii", punctOnly, back);
// Han-Latin is now forward-only per CLDR ticket #5630
//Transliterator t2 = Transliterator.getInstance("any-Han");
//back = t2.transform(widePunctOnly);
//assertEquals("Should be same", widePunctOnly, back);
}
public void TestCommonDigits() {
UnicodeSet westernDigitSet = new UnicodeSet("[0-9]");
UnicodeSet westernDigitSetAndMarks = new UnicodeSet("[[0-9][:Mn:]]");
UnicodeSet arabicDigitSet = new UnicodeSet("[[:Nd:]&[:block=Arabic:]]");
Transliterator latin = Transliterator.getInstance("Any-Latn");
Transliterator arabic = Transliterator.getInstance("Any-Arabic");
String westernDigits = getList(westernDigitSet);
String arabicDigits = getList(arabicDigitSet);
String fromArabic = latin.transform(arabicDigits);
assertContainsAll("Any-Latin transforms Arabic digits", westernDigitSetAndMarks, fromArabic);
if (false) { // we don't require conversion to Arabic digits
String fromLatin = arabic.transform(westernDigits);
assertContainsAll("Any-Arabic transforms Western digits", arabicDigitSet, fromLatin);
}
}
// might want to add to TestFmwk
private void assertContainsAll(String message, UnicodeSet set, String string) {
handleAssert(set.containsAll(string), message, set, string, "contains all of", false);
}
private void assertContainsNone(String message, UnicodeSet set, String string) {
handleAssert(set.containsNone(string), message, set, string, "contains none of", false);
}
// might want to add to UnicodeSet
private String getList(UnicodeSet set) {
StringBuffer result = new StringBuffer();
for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
result.append(it.getString());
}
return result.toString();
}
}