blob: 077a1a13c960dad614cbafa221de0580c8deac30 [file] [log] [blame]
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/**
*******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.dev.test.lang;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.test.TestUtil;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.CaseMap;
import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
/**
* <p>Testing character casing</p>
* <p>Mostly following the test cases in strcase.cpp for ICU</p>
* @author Syn Wee Quek
* @since march 14 2002
*/
@RunWith(JUnit4.class)
public final class UCharacterCaseTest extends TestFmwk
{
// constructor -----------------------------------------------------------
/**
* Constructor
*/
public UCharacterCaseTest()
{
}
// public methods --------------------------------------------------------
/**
* Testing the uppercase and lowercase function of UCharacter
*/
@Test
public void TestCharacter()
{
for (int i = 0; i < CHARACTER_LOWER_.length; i ++) {
if (UCharacter.isLetter(CHARACTER_LOWER_[i]) &&
!UCharacter.isLowerCase(CHARACTER_LOWER_[i])) {
errln("FAIL isLowerCase test for \\u" +
hex(CHARACTER_LOWER_[i]));
break;
}
if (UCharacter.isLetter(CHARACTER_UPPER_[i]) &&
!(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) ||
UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) {
errln("FAIL isUpperCase test for \\u" +
hex(CHARACTER_UPPER_[i]));
break;
}
if (CHARACTER_LOWER_[i] !=
UCharacter.toLowerCase(CHARACTER_UPPER_[i]) ||
(CHARACTER_UPPER_[i] !=
UCharacter.toUpperCase(CHARACTER_LOWER_[i]) &&
CHARACTER_UPPER_[i] !=
UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) {
errln("FAIL case conversion test for \\u" +
hex(CHARACTER_UPPER_[i]) +
" to \\u" + hex(CHARACTER_LOWER_[i]));
break;
}
if (CHARACTER_LOWER_[i] !=
UCharacter.toLowerCase(CHARACTER_LOWER_[i])) {
errln("FAIL lower case conversion test for \\u" +
hex(CHARACTER_LOWER_[i]));
break;
}
if (CHARACTER_UPPER_[i] !=
UCharacter.toUpperCase(CHARACTER_UPPER_[i]) &&
CHARACTER_UPPER_[i] !=
UCharacter.toTitleCase(CHARACTER_UPPER_[i])) {
errln("FAIL upper case conversion test for \\u" +
hex(CHARACTER_UPPER_[i]));
break;
}
logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" +
hex(CHARACTER_LOWER_[i]));
}
}
@Test
public void TestFolding()
{
// test simple case folding
for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) {
if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) !=
FOLDING_SIMPLE_[i + 1]) {
errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1]));
}
if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
UCharacter.FOLD_CASE_DEFAULT) !=
FOLDING_SIMPLE_[i + 1]) {
errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
", UCharacter.FOLD_CASE_DEFAULT) should be \\u"
+ hex(FOLDING_SIMPLE_[i + 1]));
}
if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) !=
FOLDING_SIMPLE_[i + 2]) {
errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2]));
}
if (UCharacter.foldCase(FOLDING_SIMPLE_[i],
UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) !=
FOLDING_SIMPLE_[i + 2]) {
errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) +
", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u"
+ hex(FOLDING_SIMPLE_[i + 2]));
}
}
// Test full string case folding with default option and separate
// buffers
if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) +
" should be " + prettify(FOLDING_DEFAULT_[0]));
}
if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))
+ " should be " + prettify(FOLDING_DEFAULT_[0]));
}
if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
UCharacter.foldCase(FOLDING_MIXED_[0], false))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false))
+ " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
}
if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals(
UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) +
", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
+ " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0]));
}
if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true))
+ " should be " + prettify(FOLDING_DEFAULT_[1]));
}
if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))
+ " should be " + prettify(FOLDING_DEFAULT_[1]));
}
// alternate handling for dotted I/dotless i (U+0130, U+0131)
if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
UCharacter.foldCase(FOLDING_MIXED_[1], false))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false))
+ " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
}
if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals(
UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) {
errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) +
", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))
+ " should be "
+ prettify(FOLDING_EXCLUDE_SPECIAL_I_[1]));
}
}
@Test
public void TestInvalidCodePointFolding() {
int[] invalidCodePoints = {
0xD800, // lead surrogate
0xDFFF, // trail surrogate
0xFDD0, // noncharacter
0xFFFF, // noncharacter
0x110000, // out of range
-1 // negative
};
for (int cp : invalidCodePoints) {
assertEquals("Invalid code points should be echoed back",
cp, UCharacter.foldCase(cp, true));
assertEquals("Invalid code points should be echoed back",
cp, UCharacter.foldCase(cp, false));
assertEquals("Invalid code points should be echoed back",
cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_DEFAULT));
assertEquals("Invalid code points should be echoed back",
cp, UCharacter.foldCase(cp, UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I));
}
}
/**
* Testing the strings case mapping methods
*/
@Test
public void TestUpper()
{
// uppercase with root locale and in the same buffer
if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) {
errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " +
UPPER_ROOT_ + " instead got " +
UCharacter.toUpperCase(UPPER_BEFORE_));
}
// uppercase with turkish locale and separate buffers
if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_,
UPPER_BEFORE_))) {
errln("Fail " + UPPER_BEFORE_ +
" after turkish-sensitive uppercase should be " +
UPPER_TURKISH_ + " instead of " +
UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_));
}
// uppercase a short string with root locale
if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) {
errln("error in toUpper(root locale)=\"" + UPPER_MINI_ +
"\" expected \"" + UPPER_MINI_UPPER_ + "\"");
}
if (!SHARED_UPPERCASE_TOPKAP_.equals(
UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) {
errln("toUpper failed: expected \"" +
SHARED_UPPERCASE_TOPKAP_ + "\", got \"" +
UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\".");
}
if (!SHARED_UPPERCASE_TURKISH_.equals(
UCharacter.toUpperCase(TURKISH_LOCALE_,
SHARED_LOWERCASE_TOPKAP_))) {
errln("toUpper failed: expected \"" +
SHARED_UPPERCASE_TURKISH_ + "\", got \"" +
UCharacter.toUpperCase(TURKISH_LOCALE_,
SHARED_LOWERCASE_TOPKAP_) + "\".");
}
if (!SHARED_UPPERCASE_GERMAN_.equals(
UCharacter.toUpperCase(GERMAN_LOCALE_,
SHARED_LOWERCASE_GERMAN_))) {
errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_
+ "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_,
SHARED_LOWERCASE_GERMAN_) + "\".");
}
if (!SHARED_UPPERCASE_GREEK_.equals(
UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) {
errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ +
"\", got \"" + UCharacter.toUpperCase(
SHARED_LOWERCASE_GREEK_) + "\".");
}
}
@Test
public void TestLower()
{
if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) {
errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " +
LOWER_ROOT_ + " instead of " +
UCharacter.toLowerCase(LOWER_BEFORE_));
}
// lowercase with turkish locale
if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_,
LOWER_BEFORE_))) {
errln("Fail " + LOWER_BEFORE_ +
" after turkish-sensitive lowercase should be " +
LOWER_TURKISH_ + " instead of " +
UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_));
}
if (!SHARED_LOWERCASE_ISTANBUL_.equals(
UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) {
errln("1. toLower failed: expected \"" +
SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" +
UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\".");
}
if (!SHARED_LOWERCASE_TURKISH_.equals(
UCharacter.toLowerCase(TURKISH_LOCALE_,
SHARED_UPPERCASE_ISTANBUL_))) {
errln("2. toLower failed: expected \"" +
SHARED_LOWERCASE_TURKISH_ + "\", got \"" +
UCharacter.toLowerCase(TURKISH_LOCALE_,
SHARED_UPPERCASE_ISTANBUL_) + "\".");
}
if (!SHARED_LOWERCASE_GREEK_.equals(
UCharacter.toLowerCase(GREEK_LOCALE_,
SHARED_UPPERCASE_GREEK_))) {
errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ +
"\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_,
SHARED_UPPERCASE_GREEK_) + "\".");
}
}
@Test
public void TestTitleRegression() throws java.io.IOException {
boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE);
assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable);
assertEquals("Titlecase check",
"The Quick Brown Fox Can't Jump Over The Lazy Dogs.",
UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null));
}
@Test
public void TestTitle()
{
try{
for (int i = 0; i < TITLE_DATA_.length;) {
String test = TITLE_DATA_[i++];
String expected = TITLE_DATA_[i++];
ULocale locale = new ULocale(TITLE_DATA_[i++]);
int breakType = Integer.parseInt(TITLE_DATA_[i++]);
String optionsString = TITLE_DATA_[i++];
BreakIterator iter =
breakType >= 0 ?
BreakIterator.getBreakInstance(locale, breakType) :
breakType == -2 ?
// Open a trivial break iterator that only delivers { 0, length }
// or even just { 0 } as boundaries.
new RuleBasedBreakIterator(".*;") :
null;
int options = 0;
if (optionsString.indexOf('L') >= 0) {
options |= UCharacter.TITLECASE_NO_LOWERCASE;
}
if (optionsString.indexOf('A') >= 0) {
options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT;
}
String result = UCharacter.toTitleCase(locale, test, iter, options);
if (!expected.equals(result)) {
errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " +
prettify(expected) + " but got " +
prettify(result));
}
if (options == 0) {
result = UCharacter.toTitleCase(locale, test, iter);
if (!expected.equals(result)) {
errln("titlecasing for " + prettify(test) + " should be " +
prettify(expected) + " but got " +
prettify(result));
}
}
}
}catch(Exception ex){
warnln("Could not find data for BreakIterators");
}
}
// Not a @Test. See ICU4C intltest strcase.cpp TestCasingImpl().
void TestCasingImpl(String input, String output, CaseMap.Title toTitle, Locale locale) {
String result = toTitle.apply(locale, null, input, new StringBuilder(), null).toString();
assertEquals("toTitle(" + input + ')', output, result);
}
@Test
public void TestTitleOptions() {
Locale root = Locale.ROOT;
// New options in ICU 60.
TestCasingImpl("ʻcAt! ʻeTc.", "ʻCat! ʻetc.",
CaseMap.toTitle().wholeString(), root);
TestCasingImpl("a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.",
CaseMap.toTitle().sentences().noLowercase(), root);
TestCasingImpl("49eRs", "49ers",
CaseMap.toTitle().wholeString(), root);
TestCasingImpl("«丰(aBc)»", "«丰(abc)»",
CaseMap.toTitle().wholeString(), root);
TestCasingImpl("49eRs", "49Ers",
CaseMap.toTitle().wholeString().adjustToCased(), root);
TestCasingImpl("«丰(aBc)»", "«丰(Abc)»",
CaseMap.toTitle().wholeString().adjustToCased(), root);
TestCasingImpl(" john. Smith", " John. Smith",
CaseMap.toTitle().wholeString().noLowercase(), root);
TestCasingImpl(" john. Smith", " john. smith",
CaseMap.toTitle().wholeString().noBreakAdjustment(), root);
TestCasingImpl("«ijs»", "«IJs»",
CaseMap.toTitle().wholeString(), new Locale("nl", "BE"));
TestCasingImpl("«ijs»", "«İjs»",
CaseMap.toTitle().wholeString(), new Locale("tr", "DE"));
// Test conflicting settings.
// If & when we add more options, then the ORed combinations may become
// indistinguishable from valid values.
try {
CaseMap.toTitle().noBreakAdjustment().adjustToCased().
apply(root, null, "", new StringBuilder(), null);
fail("CaseMap.toTitle(multiple adjustment options) " +
"did not throw an IllegalArgumentException");
} catch(IllegalArgumentException expected) {
}
try {
CaseMap.toTitle().wholeString().sentences().
apply(root, null, "", new StringBuilder(), null);
fail("CaseMap.toTitle(multiple iterator options) " +
"did not throw an IllegalArgumentException");
} catch(IllegalArgumentException expected) {
}
BreakIterator iter = BreakIterator.getCharacterInstance(root);
try {
CaseMap.toTitle().wholeString().apply(root, iter, "", new StringBuilder(), null);
fail("CaseMap.toTitle(iterator option + iterator) " +
"did not throw an IllegalArgumentException");
} catch(IllegalArgumentException expected) {
}
}
@Test
public void TestLithuanianTitle() {
ULocale LOC_LITHUANIAN = new ULocale("lt");
assertEquals("Lithuanian titlecase check in Lithuanian",
"\u0058\u0069\u0307\u0308",
UCharacter.toTitleCase(LOC_LITHUANIAN, "\u0058\u0049\u0308", null));
assertEquals("Lithuanian titlecase check in Lithuanian",
"\u0058\u0069\u0307\u0308",
UCharacter.toTitleCase(LITHUANIAN_LOCALE_, "\u0058\u0049\u0308", null));
}
@Test
public void TestDutchTitle() {
ULocale LOC_DUTCH = new ULocale("nl");
int options = 0;
options |= UCharacter.TITLECASE_NO_LOWERCASE;
BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH);
assertEquals("Dutch titlecase check in English",
"Ijssel Igloo Ijmuiden",
UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null));
assertEquals("Dutch titlecase check in Dutch",
"IJssel Igloo IJmuiden",
UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null));
// Also check the behavior using Java Locale
assertEquals("Dutch titlecase check in English (Java Locale)",
"Ijssel Igloo Ijmuiden",
UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null));
assertEquals("Dutch titlecase check in Dutch (Java Locale)",
"IJssel Igloo IJmuiden",
UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null));
iter.setText("ijssel igloo IjMUIdEN iPoD ijenough");
assertEquals("Dutch titlecase check in Dutch with nolowercase option",
"IJssel Igloo IJMUIdEN IPoD IJenough",
UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options));
}
@Test
public void TestSpecial()
{
for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) {
int j = i * 3;
Locale locale = SPECIAL_LOCALES_[i];
String str = SPECIAL_DATA_[j];
if (locale != null) {
if (!SPECIAL_DATA_[j + 1].equals(
UCharacter.toLowerCase(locale, str))) {
errln("error lowercasing special characters " +
hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1])
+ " for locale " + locale.toString() + " but got " +
hex(UCharacter.toLowerCase(locale, str)));
}
if (!SPECIAL_DATA_[j + 2].equals(
UCharacter.toUpperCase(locale, str))) {
errln("error uppercasing special characters " +
hex(str) + " expected " + SPECIAL_DATA_[j + 2]
+ " for locale " + locale.toString() + " but got " +
hex(UCharacter.toUpperCase(locale, str)));
}
}
else {
String lower = UCharacter.toLowerCase(str);
if (!SPECIAL_DATA_[j + 1].equals(lower)) {
errln("error lowercasing special characters " +
hex(str) + " expected " + SPECIAL_DATA_[j + 1] +
" but got " + hex(lower));
}
String upper = UCharacter.toUpperCase(str);
if (!SPECIAL_DATA_[j + 2].equals(upper)) {
errln("error uppercasing special characters " +
hex(str) + " expected " + SPECIAL_DATA_[j + 2] +
" but got " + hex(upper));
}
}
}
// turkish & azerbaijani dotless i & dotted I
// remove dot above if there was a capital I before and there are no
// more accents above
if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase(
TURKISH_LOCALE_, SPECIAL_DOTTED_))) {
errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ +
"\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ +
"\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_,
SPECIAL_DOTTED_));
}
if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase(
GERMAN_LOCALE_, SPECIAL_DOTTED_))) {
errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ +
"\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ +
"\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
SPECIAL_DOTTED_));
}
// lithuanian dot above in uppercasing
if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals(
UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ +
"\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ +
"\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_,
SPECIAL_DOT_ABOVE_));
}
if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase(
GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) {
errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ +
"\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ +
"\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_,
SPECIAL_DOT_ABOVE_));
}
// lithuanian adds dot above to i in lowercasing if there are more
// above accents
if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals(
UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
SPECIAL_DOT_ABOVE_UPPER_))) {
errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
"\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ +
"\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_,
SPECIAL_DOT_ABOVE_UPPER_));
}
if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals(
UCharacter.toLowerCase(GERMAN_LOCALE_,
SPECIAL_DOT_ABOVE_UPPER_))) {
errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ +
"\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ +
"\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_,
SPECIAL_DOT_ABOVE_UPPER_));
}
}
/**
* Tests for case mapping in the file SpecialCasing.txt
* This method reads in SpecialCasing.txt file for testing purposes.
* A default path is provided relative to the src path, however the user
* could set a system property to change the directory path.<br>
* e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest
*/
@Test
public void TestSpecialCasingTxt()
{
try
{
// reading in the SpecialCasing file
BufferedReader input = TestUtil.getDataReader(
"unicode/SpecialCasing.txt");
while (true)
{
String s = input.readLine();
if (s == null) {
break;
}
if (s.length() == 0 || s.charAt(0) == '#') {
continue;
}
String chstr[] = getUnicodeStrings(s);
StringBuffer strbuffer = new StringBuffer(chstr[0]);
StringBuffer lowerbuffer = new StringBuffer(chstr[1]);
StringBuffer upperbuffer = new StringBuffer(chstr[3]);
Locale locale = null;
for (int i = 4; i < chstr.length; i ++) {
String condition = chstr[i];
if (Character.isLowerCase(chstr[i].charAt(0))) {
// specified locale
locale = new Locale(chstr[i], "");
}
else if (condition.compareToIgnoreCase("Not_Before_Dot")
== 0) {
// turns I into dotless i
}
else if (condition.compareToIgnoreCase(
"More_Above") == 0) {
strbuffer.append((char)0x300);
lowerbuffer.append((char)0x300);
upperbuffer.append((char)0x300);
}
else if (condition.compareToIgnoreCase(
"After_Soft_Dotted") == 0) {
strbuffer.insert(0, 'i');
lowerbuffer.insert(0, 'i');
String lang = "";
if (locale != null) {
lang = locale.getLanguage();
}
if (lang.equals("tr") || lang.equals("az")) {
// this is to be removed when 4.0 data comes out
// and upperbuffer.insert uncommented
// see jitterbug 2344
chstr[i] = "After_I";
strbuffer.deleteCharAt(0);
lowerbuffer.deleteCharAt(0);
i --;
continue;
// upperbuffer.insert(0, '\u0130');
}
else {
upperbuffer.insert(0, 'I');
}
}
else if (condition.compareToIgnoreCase(
"Final_Sigma") == 0) {
strbuffer.insert(0, 'c');
lowerbuffer.insert(0, 'c');
upperbuffer.insert(0, 'C');
}
else if (condition.compareToIgnoreCase("After_I") == 0) {
strbuffer.insert(0, 'I');
lowerbuffer.insert(0, 'i');
String lang = "";
if (locale != null) {
lang = locale.getLanguage();
}
if (lang.equals("tr") || lang.equals("az")) {
upperbuffer.insert(0, 'I');
}
}
}
chstr[0] = strbuffer.toString();
chstr[1] = lowerbuffer.toString();
chstr[3] = upperbuffer.toString();
if (locale == null) {
if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[1]) + " but resulted in " +
Utility.escape(UCharacter.toLowerCase(chstr[0])));
}
if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[3]) + " but resulted in " +
Utility.escape(UCharacter.toUpperCase(chstr[0])));
}
}
else {
if (!UCharacter.toLowerCase(locale, chstr[0]).equals(
chstr[1])) {
errln(s);
errln("Fail: toLowerCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[1]) + " but resulted in " +
Utility.escape(UCharacter.toLowerCase(locale,
chstr[0])));
}
if (!UCharacter.toUpperCase(locale, chstr[0]).equals(
chstr[3])) {
errln(s);
errln("Fail: toUpperCase for character " +
Utility.escape(chstr[0]) + ", expected "
+ Utility.escape(chstr[3]) + " but resulted in " +
Utility.escape(UCharacter.toUpperCase(locale,
chstr[0])));
}
}
}
input.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
@Test
public void TestUpperLower()
{
int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8,
0x01c9, 0x000c};
int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9,
0x01c9, 0x000c};
String upperTest = "abcdefg123hij.?:klmno";
String lowerTest = "ABCDEFG123HIJ.?:KLMNO";
// Checks LetterLike Symbols which were previously a source of
// confusion [Bertrand A. D. 02/04/98]
for (int i = 0x2100; i < 0x2138; i ++) {
/* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) {
if (i != UCharacter.toLowerCase(i)) { // itself
errln("Failed case conversion with itself: \\u"
+ Utility.hex(i, 4));
}
if (i != UCharacter.toUpperCase(i)) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(i, 4));
}
}
}
for (int i = 0; i < upper.length; i ++) {
if (UCharacter.toLowerCase(upper[i]) != lower[i]) {
errln("FAILED UCharacter.tolower() for \\u"
+ Utility.hex(upper[i], 4)
+ " Expected \\u" + Utility.hex(lower[i], 4)
+ " Got \\u"
+ Utility.hex(UCharacter.toLowerCase(upper[i]), 4));
}
}
logln("testing upper lower");
for (int i = 0; i < upperTest.length(); i ++) {
logln("testing to upper to lower");
if (UCharacter.isLetter(upperTest.charAt(i)) &&
!UCharacter.isLowerCase(upperTest.charAt(i))) {
errln("Failed isLowerCase test at \\u"
+ Utility.hex(upperTest.charAt(i), 4));
}
else if (UCharacter.isLetter(lowerTest.charAt(i))
&& !UCharacter.isUpperCase(lowerTest.charAt(i))) {
errln("Failed isUpperCase test at \\u"
+ Utility.hex(lowerTest.charAt(i), 4));
}
else if (upperTest.charAt(i)
!= UCharacter.toLowerCase(lowerTest.charAt(i))) {
errln("Failed case conversion from \\u"
+ Utility.hex(lowerTest.charAt(i), 4) + " To \\u"
+ Utility.hex(upperTest.charAt(i), 4));
}
else if (lowerTest.charAt(i)
!= UCharacter.toUpperCase(upperTest.charAt(i))) {
errln("Failed case conversion : \\u"
+ Utility.hex(upperTest.charAt(i), 4) + " To \\u"
+ Utility.hex(lowerTest.charAt(i), 4));
}
else if (upperTest.charAt(i)
!= UCharacter.toLowerCase(upperTest.charAt(i))) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(upperTest.charAt(i)));
}
else if (lowerTest.charAt(i)
!= UCharacter.toUpperCase(lowerTest.charAt(i))) {
errln("Failed case conversion with itself: \\u"
+ Utility.hex(lowerTest.charAt(i)));
}
}
logln("done testing upper Lower");
}
private void assertGreekUpper(String s, String expected) {
assertEquals("toUpper/Greek(" + s + ')', expected, UCharacter.toUpperCase(GREEK_LOCALE_, s));
}
@Test
public void TestGreekUpper() {
// http://bugs.icu-project.org/trac/ticket/5456
assertGreekUpper("άδικος, κείμενο, ίριδα", "ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
// https://bugzilla.mozilla.org/show_bug.cgi?id=307039
// https://bug307039.bmoattachments.org/attachment.cgi?id=194893
assertGreekUpper("Πατάτα", "ΠΑΤΑΤΑ");
assertGreekUpper("Αέρας, Μυστήριο, Ωραίο", "ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
assertGreekUpper("Μαΐου, Πόρος, Ρύθμιση", "ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
assertGreekUpper("ΰ, Τηρώ, Μάιος", "Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
assertGreekUpper("άυλος", "ΑΫΛΟΣ");
assertGreekUpper("ΑΫΛΟΣ", "ΑΫΛΟΣ");
assertGreekUpper("Άκλιτα ρήματα ή άκλιτες μετοχές", "ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
// http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
assertGreekUpper("Επειδή η αναγνώριση της αξιοπρέπειας", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
assertGreekUpper("νομικού ή διεθνούς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
// http://unicode.org/udhr/d/udhr_ell_polytonic.html
assertGreekUpper("Ἐπειδὴ ἡ ἀναγνώριση", "ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
assertGreekUpper("νομικοῦ ἢ διεθνοῦς", "ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
// From Google bug report
assertGreekUpper("Νέο, Δημιουργία", "ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
// http://crbug.com/234797
assertGreekUpper("Ελάτε να φάτε τα καλύτερα παϊδάκια!", "ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
assertGreekUpper("Μαΐου, τρόλεϊ", "ΜΑΪΟΥ, ΤΡΟΛΕΪ");
assertGreekUpper("Το ένα ή το άλλο.", "ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
// http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
assertGreekUpper("ή.", "Ή.");
}
private static final class EditChange {
private boolean change;
private int oldLength, newLength;
EditChange(boolean change, int oldLength, int newLength) {
this.change = change;
this.oldLength = oldLength;
this.newLength = newLength;
}
}
private static String printOneEdit(Edits.Iterator ei) {
if (ei.hasChange()) {
return "" + ei.oldLength() + "->" + ei.newLength();
} else {
return "" + ei.oldLength() + "=" + ei.newLength();
}
}
/**
* Maps indexes according to the expected edits.
* A destination index can occur multiple times when there are source deletions.
* Map according to the last occurrence, normally in a non-empty destination span.
* Simplest is to search from the back.
*/
private static int srcIndexFromDest(
EditChange expected[], int srcLength, int destLength, int index) {
int srcIndex = srcLength;
int destIndex = destLength;
int i = expected.length;
while (index < destIndex && i > 0) {
--i;
int prevSrcIndex = srcIndex - expected[i].oldLength;
int prevDestIndex = destIndex - expected[i].newLength;
if (index == prevDestIndex) {
return prevSrcIndex;
} else if (index > prevDestIndex) {
if (expected[i].change) {
// In a change span, map to its end.
return srcIndex;
} else {
// In an unchanged span, offset within it.
return prevSrcIndex + (index - prevDestIndex);
}
}
srcIndex = prevSrcIndex;
destIndex = prevDestIndex;
}
// index is outside the string.
return srcIndex;
}
private static int destIndexFromSrc(
EditChange expected[], int srcLength, int destLength, int index) {
int srcIndex = srcLength;
int destIndex = destLength;
int i = expected.length;
while (index < srcIndex && i > 0) {
--i;
int prevSrcIndex = srcIndex - expected[i].oldLength;
int prevDestIndex = destIndex - expected[i].newLength;
if (index == prevSrcIndex) {
return prevDestIndex;
} else if (index > prevSrcIndex) {
if (expected[i].change) {
// In a change span, map to its end.
return destIndex;
} else {
// In an unchanged span, offset within it.
return prevDestIndex + (index - prevSrcIndex);
}
}
srcIndex = prevSrcIndex;
destIndex = prevDestIndex;
}
// index is outside the string.
return destIndex;
}
private void checkEqualEdits(String name, Edits e1, Edits e2) {
Edits.Iterator ei1 = e1.getFineIterator();
Edits.Iterator ei2 = e2.getFineIterator();
for (int i = 0;; ++i) {
boolean ei1HasNext = ei1.next();
boolean ei2HasNext = ei2.next();
assertEquals(name + " next()[" + i + "]", ei1HasNext, ei2HasNext);
assertEquals(name + " edit[" + i + "]", printOneEdit(ei1), printOneEdit(ei2));
if (!ei1HasNext || !ei2HasNext) {
break;
}
}
}
private static void checkEditsIter(
String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
EditChange[] expected, boolean withUnchanged) {
assertFalse(name, ei2.findSourceIndex(-1));
assertFalse(name, ei2.findDestinationIndex(-1));
int expSrcIndex = 0;
int expDestIndex = 0;
int expReplIndex = 0;
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
EditChange expect = expected[expIndex];
String msg = name + ' ' + expIndex;
if (withUnchanged || expect.change) {
assertTrue(msg, ei1.next());
assertEquals(msg, expect.change, ei1.hasChange());
assertEquals(msg, expect.oldLength, ei1.oldLength());
assertEquals(msg, expect.newLength, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
if (expect.oldLength > 0) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
assertEquals(msg, expect.change, ei2.hasChange());
assertEquals(msg, expect.oldLength, ei2.oldLength());
assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next();
ei2.next();
}
}
if (expect.newLength > 0) {
assertTrue(msg, ei2.findDestinationIndex(expDestIndex));
assertEquals(msg, expect.change, ei2.hasChange());
assertEquals(msg, expect.oldLength, ei2.oldLength());
assertEquals(msg, expect.newLength, ei2.newLength());
assertEquals(msg, expSrcIndex, ei2.sourceIndex());
assertEquals(msg, expDestIndex, ei2.destinationIndex());
assertEquals(msg, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
ei2.next();
ei2.next();
}
}
expSrcIndex += expect.oldLength;
expDestIndex += expect.newLength;
if (expect.change) {
expReplIndex += expect.newLength;
}
}
String msg = name + " end";
assertFalse(msg, ei1.next());
assertFalse(msg, ei1.hasChange());
assertEquals(msg, 0, ei1.oldLength());
assertEquals(msg, 0, ei1.newLength());
assertEquals(msg, expSrcIndex, ei1.sourceIndex());
assertEquals(msg, expDestIndex, ei1.destinationIndex());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex));
assertFalse(name, ei2.findDestinationIndex(expDestIndex));
// Check mapping of all indexes against a simple implementation
// that works on the expected changes.
// Iterate once forward, once backward, to cover more runtime conditions.
int srcLength = expSrcIndex;
int destLength = expDestIndex;
List<Integer> srcIndexes = new ArrayList<>();
List<Integer> destIndexes = new ArrayList<>();
srcIndexes.add(-1);
destIndexes.add(-1);
int srcIndex = 0;
int destIndex = 0;
for (int i = 0; i < expected.length; ++i) {
if (expected[i].oldLength > 0) {
srcIndexes.add(srcIndex);
if (expected[i].oldLength > 1) {
srcIndexes.add(srcIndex + 1);
if (expected[i].oldLength > 2) {
srcIndexes.add(srcIndex + expected[i].oldLength - 1);
}
}
}
if (expected[i].newLength > 0) {
destIndexes.add(destIndex);
if (expected[i].newLength > 1) {
destIndexes.add(destIndex + 1);
if (expected[i].newLength > 2) {
destIndexes.add(destIndex + expected[i].newLength - 1);
}
}
}
srcIndex += expected[i].oldLength;
destIndex += expected[i].newLength;
}
srcIndexes.add(srcLength);
destIndexes.add(destLength);
srcIndexes.add(srcLength + 1);
destIndexes.add(destLength + 1);
Collections.reverse(destIndexes);
// Zig-zag across the indexes to stress next() <-> previous().
for (int i = 0; i < srcIndexes.size(); ++i) {
for (int j : ZIG_ZAG) {
if ((i + j) < srcIndexes.size()) {
int si = srcIndexes.get(i + j);
assertEquals(name + " destIndexFromSrc(" + si + "):",
destIndexFromSrc(expected, srcLength, destLength, si),
ei2.destinationIndexFromSourceIndex(si));
}
}
}
for (int i = 0; i < destIndexes.size(); ++i) {
for (int j : ZIG_ZAG) {
if ((i + j) < destIndexes.size()) {
int di = destIndexes.get(i + j);
assertEquals(name + " srcIndexFromDest(" + di + "):",
srcIndexFromDest(expected, srcLength, destLength, di),
ei2.sourceIndexFromDestinationIndex(di));
}
}
}
}
private static final int[] ZIG_ZAG = { 0, 1, 2, 3, 2, 1 };
@Test
public void TestEdits() {
Edits edits = new Edits();
assertFalse("new Edits hasChanges", edits.hasChanges());
assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed
edits.addUnchanged(0);
edits.addReplace(2, 1);
edits.addReplace(2, 1);
edits.addReplace(0, 10);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
assertTrue("some edits hasChanges", edits.hasChanges());
assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
EditChange[] coarseExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 103106, 104013)
};
checkEditsIter("coarse",
edits.getCoarseIterator(), edits.getCoarseIterator(),
coarseExpectedChanges, true);
checkEditsIter("coarse changes",
edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
coarseExpectedChanges, false);
EditChange[] fineExpectedChanges = new EditChange[] {
new EditChange(false, 10003, 10003),
new EditChange(true, 2, 1),
new EditChange(true, 2, 1),
new EditChange(true, 2, 1),
new EditChange(true, 0, 10),
new EditChange(true, 100, 0),
new EditChange(true, 3000, 4000),
new EditChange(true, 100000, 100000)
};
checkEditsIter("fine",
edits.getFineIterator(), edits.getFineIterator(),
fineExpectedChanges, true);
checkEditsIter("fine changes",
edits.getFineChangesIterator(), edits.getFineChangesIterator(),
fineExpectedChanges, false);
edits.reset();
assertFalse("reset hasChanges", edits.hasChanges());
assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits.Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next());
}
@Test
public void TestEditsFindFwdBwd() {
// Some users need index mappings to be efficient when they are out of order.
// The most interesting failure case for this test is it taking a very long time.
Edits e = new Edits();
int N = 200000;
for (int i = 0; i < N; ++i) {
e.addUnchanged(1);
e.addReplace(3, 1);
}
Edits.Iterator iter = e.getFineIterator();
for (int i = 0; i <= N; i += 2) {
assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i));
assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1));
}
for (int i = N; i >= 0; i -= 2) {
assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1));
assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i));
}
}
@Test
public void TestMergeEdits() {
Edits ab = new Edits(), bc = new Edits(), ac = new Edits(), expected_ac = new Edits();
// Simple: Two parallel non-changes.
ab.addUnchanged(2);
bc.addUnchanged(2);
expected_ac.addUnchanged(2);
// Simple: Two aligned changes.
ab.addReplace(3, 2);
bc.addReplace(2, 1);
expected_ac.addReplace(3, 1);
// Unequal non-changes.
ab.addUnchanged(5);
bc.addUnchanged(3);
expected_ac.addUnchanged(3);
// ab ahead by 2
// Overlapping changes accumulate until they share a boundary.
ab.addReplace(4, 3);
bc.addReplace(3, 2);
ab.addReplace(4, 3);
bc.addReplace(3, 2);
ab.addReplace(4, 3);
bc.addReplace(3, 2);
bc.addUnchanged(4);
expected_ac.addReplace(14, 8);
// bc ahead by 2
// Balance out intermediate-string lengths.
ab.addUnchanged(2);
expected_ac.addUnchanged(2);
// Insert something and delete it: Should disappear.
ab.addReplace(0, 5);
ab.addReplace(0, 2);
bc.addReplace(7, 0);
// Parallel change to make a new boundary.
ab.addReplace(1, 2);
bc.addReplace(2, 3);
expected_ac.addReplace(1, 3);
// Multiple ab deletions should remain separate at the boundary.
ab.addReplace(1, 0);
ab.addReplace(2, 0);
ab.addReplace(3, 0);
expected_ac.addReplace(1, 0);
expected_ac.addReplace(2, 0);
expected_ac.addReplace(3, 0);
// Unequal non-changes can be split for another boundary.
ab.addUnchanged(2);
bc.addUnchanged(1);
expected_ac.addUnchanged(1);
// ab ahead by 1
// Multiple bc insertions should create a boundary and remain separate.
bc.addReplace(0, 4);
bc.addReplace(0, 5);
bc.addReplace(0, 6);
expected_ac.addReplace(0, 4);
expected_ac.addReplace(0, 5);
expected_ac.addReplace(0, 6);
// ab ahead by 1
// Multiple ab deletions in the middle of a bc change are merged.
bc.addReplace(2, 2);
// bc ahead by 1
ab.addReplace(1, 0);
ab.addReplace(2, 0);
ab.addReplace(3, 0);
ab.addReplace(4, 1);
expected_ac.addReplace(11, 2);
// Multiple bc insertions in the middle of an ab change are merged.
ab.addReplace(5, 6);
bc.addReplace(3, 3);
// ab ahead by 3
bc.addReplace(0, 4);
bc.addReplace(0, 5);
bc.addReplace(0, 6);
bc.addReplace(3, 7);
expected_ac.addReplace(5, 25);
// Delete around a deletion.
ab.addReplace(4, 4);
ab.addReplace(3, 0);
ab.addUnchanged(2);
bc.addReplace(2, 2);
bc.addReplace(4, 0);
expected_ac.addReplace(9, 2);
// Insert into an insertion.
ab.addReplace(0, 2);
bc.addReplace(1, 1);
bc.addReplace(0, 8);
bc.addUnchanged(4);
expected_ac.addReplace(0, 10);
// bc ahead by 3
// Balance out intermediate-string lengths.
ab.addUnchanged(3);
expected_ac.addUnchanged(3);
// Deletions meet insertions.
// Output order is arbitrary in principle, but we expect insertions first
// and want to keep it that way.
ab.addReplace(2, 0);
ab.addReplace(4, 0);
ab.addReplace(6, 0);
bc.addReplace(0, 1);
bc.addReplace(0, 3);
bc.addReplace(0, 5);
expected_ac.addReplace(0, 1);
expected_ac.addReplace(0, 3);
expected_ac.addReplace(0, 5);
expected_ac.addReplace(2, 0);
expected_ac.addReplace(4, 0);
expected_ac.addReplace(6, 0);
// End with a non-change, so that further edits are never reordered.
ab.addUnchanged(1);
bc.addUnchanged(1);
expected_ac.addUnchanged(1);
ac.mergeAndAppend(ab, bc);
checkEqualEdits("ab+bc", expected_ac, ac);
// Append more Edits.
Edits ab2 = new Edits(), bc2 = new Edits();
ab2.addUnchanged(5);
bc2.addReplace(1, 2);
bc2.addUnchanged(4);
expected_ac.addReplace(1, 2);
expected_ac.addUnchanged(4);
ac.mergeAndAppend(ab2, bc2);
checkEqualEdits("ab2+bc2", expected_ac, ac);
// Append empty edits.
Edits empty = new Edits();
ac.mergeAndAppend(empty, empty);
checkEqualEdits("empty+empty", expected_ac, ac);
// Error: Append more edits with mismatched intermediate-string lengths.
Edits mismatch = new Edits();
mismatch.addReplace(1, 1);
try {
ac.mergeAndAppend(ab2, mismatch);
fail("ab2+mismatch did not yield IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
try {
ac.mergeAndAppend(mismatch, bc2);
fail("mismatch+bc2 did not yield IllegalArgumentException");
} catch (IllegalArgumentException expected) {
}
}
@Test
public void TestCaseMapWithEdits() {
StringBuilder sb = new StringBuilder();
Edits edits = new Edits();
sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
assertEquals("toLower(Istanbul)", "ıb", sb.toString());
EditChange[] lowerExpectedChanges = new EditChange[] {
new EditChange(true, 1, 1),
new EditChange(false, 4, 4),
new EditChange(true, 1, 1),
new EditChange(false, 2, 2)
};
checkEditsIter("toLower(Istanbul)",
edits.getFineIterator(), edits.getFineIterator(),
lowerExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
EditChange[] upperExpectedChanges = new EditChange[] {
new EditChange(false, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1),
new EditChange(true, 1, 1)
};
checkEditsIter("toUpper(Πατάτα)",
edits.getFineIterator(), edits.getFineIterator(),
upperExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits);
assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
EditChange[] titleExpectedChanges = new EditChange[] {
new EditChange(false, 1, 1),
new EditChange(true, 1, 1),
new EditChange(false, 10, 10)
};
checkEditsIter("toTitle(IjssEL IglOo)",
edits.getFineIterator(), edits.getFineIterator(),
titleExpectedChanges, true);
sb.delete(0, sb.length());
edits.reset();
sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
EditChange[] foldExpectedChanges = new EditChange[] {
new EditChange(true, 1, 1),
new EditChange(true, 1, 2),
new EditChange(false, 3, 3),
new EditChange(true, 1, 1),
new EditChange(false, 2, 2)
};
checkEditsIter("fold(IßtanBul)",
edits.getFineIterator(), edits.getFineIterator(),
foldExpectedChanges, true);
}
@Test
public void TestCaseMapToString() {
// String apply(..., CharSequence)
// Omit unchanged text.
assertEquals("toLower(Istanbul)", "ıb",
CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul"));
assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ",
CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα"));
assertEquals("toTitle(IjssEL IglOo)", "J",
CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
DUTCH_LOCALE_, null, "IjssEL IglOo"));
assertEquals("fold(IßtanBul)", "ıssb",
CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul"));
// Return the whole result string.
assertEquals("toLower(Istanbul)", "ıstanbul",
CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul"));
assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ",
CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα"));
assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo",
CaseMap.toTitle().noBreakAdjustment().noLowercase().apply(
DUTCH_LOCALE_, null, "IjssEL IglOo"));
assertEquals("fold(IßtanBul)", "ısstanbul",
CaseMap.fold().turkic().apply("IßtanBul"));
}
@Test
public void TestCaseMapEditsIteratorDocs() {
String input = "abcßDeF";
// output: "abcssdef"
StringBuilder sb = new StringBuilder();
Edits edits = new Edits();
CaseMap.fold().apply(input, sb, edits);
String[] fineIteratorExpected = {
"{ src[0..3] ≡ dest[0..3] (no-change) }",
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
"{ src[5..6] ≡ dest[6..7] (no-change) }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] fineChangesIteratorExpected = {
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] coarseIteratorExpected = {
"{ src[0..3] ≡ dest[0..3] (no-change) }",
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
"{ src[5..6] ≡ dest[6..7] (no-change) }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
String[] coarseChangesIteratorExpected = {
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
};
// Expected destination indices when source index is queried
int[] expectedDestFineEditIndices = {0, 0, 0, 3, 5, 6, 7};
int[] expectedDestCoarseEditIndices = {0, 0, 0, 3, 3, 6, 7};
int[] expectedDestFineStringIndices = {0, 1, 2, 3, 5, 6, 7};
int[] expectedDestCoarseStringIndices = {0, 1, 2, 3, 6, 6, 7};
// Expected source indices when destination index is queried
int[] expectedSrcFineEditIndices = { 0, 0, 0, 3, 3, 4, 5, 6 };
int[] expectedSrcCoarseEditIndices = { 0, 0, 0, 3, 3, 3, 5, 6 };
int[] expectedSrcFineStringIndices = { 0, 1, 2, 3, 4, 4, 5, 6 };
int[] expectedSrcCoarseStringIndices = { 0, 1, 2, 3, 5, 5, 5, 6 };
// Demonstrate the iterator next() method:
Edits.Iterator fineIterator = edits.getFineIterator();
int i = 0;
while (fineIterator.next()) {
String expected = fineIteratorExpected[i++];
String actual = fineIterator.toString();
assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
}
Edits.Iterator fineChangesIterator = edits.getFineChangesIterator();
i = 0;
while (fineChangesIterator.next()) {
String expected = fineChangesIteratorExpected[i++];
String actual = fineChangesIterator.toString();
assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
}
Edits.Iterator coarseIterator = edits.getCoarseIterator();
i = 0;
while (coarseIterator.next()) {
String expected = coarseIteratorExpected[i++];
String actual = coarseIterator.toString();
assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
}
Edits.Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
i = 0;
while (coarseChangesIterator.next()) {
String expected = coarseChangesIteratorExpected[i++];
String actual = coarseChangesIterator.toString();
assertEquals("Iteration #" + i, expected, actual.substring(actual.length() - expected.length()));
}
// Demonstrate the iterator indexing methods:
// fineIterator should have the same behavior as fineChangesIterator, and
// coarseIterator should have the same behavior as coarseChangesIterator.
for (int srcIndex=0; srcIndex<input.length(); srcIndex++) {
fineIterator.findSourceIndex(srcIndex);
fineChangesIterator.findSourceIndex(srcIndex);
coarseIterator.findSourceIndex(srcIndex);
coarseChangesIterator.findSourceIndex(srcIndex);
assertEquals("Source index: " + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestFineEditIndices[srcIndex],
fineChangesIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestCoarseEditIndices[srcIndex],
coarseChangesIterator.destinationIndex());
assertEquals("Source index: " + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestFineStringIndices[srcIndex],
fineChangesIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseIterator.destinationIndexFromSourceIndex(srcIndex));
assertEquals("Source index: " + srcIndex,
expectedDestCoarseStringIndices[srcIndex],
coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex));
}
for (int destIndex=0; destIndex<input.length(); destIndex++) {
fineIterator.findDestinationIndex(destIndex);
fineChangesIterator.findDestinationIndex(destIndex);
coarseIterator.findDestinationIndex(destIndex);
coarseChangesIterator.findDestinationIndex(destIndex);
assertEquals("Destination index: " + destIndex,
expectedSrcFineEditIndices[destIndex],
fineIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcFineEditIndices[destIndex],
fineChangesIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseEditIndices[destIndex],
coarseChangesIterator.sourceIndex());
assertEquals("Destination index: " + destIndex,
expectedSrcFineStringIndices[destIndex],
fineIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcFineStringIndices[destIndex],
fineChangesIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseIterator.sourceIndexFromDestinationIndex(destIndex));
assertEquals("Destination index: " + destIndex,
expectedSrcCoarseStringIndices[destIndex],
coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex));
}
}
@Test
public void TestCaseMapGreekExtended() {
// Ticket 13851
String s = "\u1F80\u1F88\u1FFC";
String result = CaseMap.toLower().apply(Locale.ROOT, s);
assertEquals("lower", "\u1F80\u1F80\u1FF3", result);
result = CaseMap.toTitle().apply(Locale.ROOT, null, s);
assertEquals("title", "\u1F88\u1F80\u1FF3", result);
}
@Test
public void TestFoldBug20316() {
String s = "廬ᾒ뻪ᣃइ垚Ⴡₓ렞체ꖲ갹ݖ䕷꾬쯎㊅ᦘᰄ㸜䡏遁럢豑黾奯㸀⊻줮끎蒹衤劔뽳趧熶撒쫃窩겨ཇ脌쵐嫑⟑겭㋋濜隣ᳰ봢ℼ櫩靛㉃炔鋳" +
"оे⳨ᦧྃ깢粣ᑤꇪ찃̹鵄ዤꛛᰙ⡝捣쯋톐蕩栭쥀뎊ᄯ৻恳〬昴껤룩列潱ᑮ煃鶖안꽊鹭宪帐❖ा쥈잔";
String result = CaseMap.fold().apply(s);
assertTrue("廬ᾒ...->廬ἢι...", result.startsWith("廬ἢι"));
s = "儊ẖ깸ᝓ恷ᇁ䜄쌼ꇸჃ䗑䘬䒥㈴槁蛚紆洔㖣믏亝醣黹Ά嶨䖕篕舀ꖧ₭ଯᒗ✧ԗ墖쁳㽎苊澎긁⾆⒞蠻왃囨ᡠ邏꾭⪐턣搤穳≠톲絋砖ሷ⠆" +
"瞏惢鵶剕듘ᅤ♟Ԡⴠ⊡鹔ጙ갑⣚堟ᣗ✸㕇絮䠎瘗⟡놥擢ꉭ佱ྪ飹痵⿑⨴츿璿僖㯷넴鋰膄釚겼ナ黪差";
result = CaseMap.fold().apply(s);
assertTrue("儊ẖ...->儊h\u0331...", result.startsWith("儊h\u0331"));
}
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE");
private static final Locale GREEK_LOCALE_ = new Locale("el", "GR");
private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US");
private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT");
private static final Locale DUTCH_LOCALE_ = new Locale("nl");
private static final int CHARACTER_UPPER_[] =
{0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e,
0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f,
0x01c4, 0x01c8, 0x000c, 0x0000};
private static final int CHARACTER_LOWER_[] =
{0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e,
0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f,
0x01c6, 0x01c9, 0x000c, 0x0000};
/*
* CaseFolding.txt says about i and its cousins:
* 0049; C; 0069; # LATIN CAPITAL LETTER I
* 0049; T; 0131; # LATIN CAPITAL LETTER I
*
* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
* That's all.
* See CaseFolding.txt and the Unicode Standard for how to apply the case foldings.
*/
private static final int FOLDING_SIMPLE_[] = {
// input, default, exclude special i
0x61, 0x61, 0x61,
0x49, 0x69, 0x131,
0x130, 0x130, 0x69,
0x131, 0x131, 0x131,
0xdf, 0xdf, 0xdf,
0xfb03, 0xfb03, 0xfb03,
0x1040e,0x10436,0x10436,
0x5ffff,0x5ffff,0x5ffff
};
private static final String FOLDING_MIXED_[] =
{"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff",
"A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"};
private static final String FOLDING_DEFAULT_[] =
{"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
"ass\u03bcffi\uD801\uDC34i\u0307\u0131"};
private static final String FOLDING_EXCLUDE_SPECIAL_I_[] =
{"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff",
"ass\u03bcffi\uD801\uDC34i\u0131"};
/**
* "IESUS CHRISTOS"
*/
private static final String SHARED_UPPERCASE_GREEK_ =
"\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3";
/**
* "iesus christos"
*/
private static final String SHARED_LOWERCASE_GREEK_ =
"\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2";
private static final String SHARED_LOWERCASE_TURKISH_ =
"\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021";
private static final String SHARED_UPPERCASE_TURKISH_ =
"\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c";
private static final String SHARED_UPPERCASE_ISTANBUL_ =
"\u0130STANBUL, NOT CONSTANTINOPLE!";
private static final String SHARED_LOWERCASE_ISTANBUL_ =
"i\u0307stanbul, not constantinople!";
private static final String SHARED_LOWERCASE_TOPKAP_ =
"topkap\u0131 palace, istanbul";
private static final String SHARED_UPPERCASE_TOPKAP_ =
"TOPKAPI PALACE, ISTANBUL";
private static final String SHARED_LOWERCASE_GERMAN_ =
"S\u00FC\u00DFmayrstra\u00DFe";
private static final String SHARED_UPPERCASE_GERMAN_ =
"S\u00DCSSMAYRSTRASSE";
private static final String UPPER_BEFORE_ =
"\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff";
private static final String UPPER_ROOT_ =
"\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
private static final String UPPER_TURKISH_ =
"\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff";
private static final String UPPER_MINI_ = "\u00df\u0061";
private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041";
private static final String LOWER_BEFORE_ =
"\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff";
private static final String LOWER_ROOT_ =
"\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff";
private static final String LOWER_TURKISH_ =
"\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff";
/**
* each item is an array with input string, result string, locale ID, break iterator, options
* the break iterator is specified as an int, same as in BreakIterator.KIND_*:
* 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default (NULL=words) -2=no breaks (.*)
* options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT
* see ICU4C source/test/testdata/casing.txt
*/
private static final String TITLE_DATA_[] = {
"\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
"\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff",
"",
"0",
"",
"\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff",
"\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff",
"",
"1",
"",
"\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933
"",
"-1",
"",
" tHe QUIcK bRoWn", " The Quick Brown",
"",
"4",
"",
"\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc",
"\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER
"",
"0",
"",
"\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j
"",
"-1",
"",
"'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'",
"",
"-1",
"",
"a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
"A \u02bbCat. A \u02bbDog! \u02bbEtc.",
"",
"-1",
"", // default
"a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
"A \u02bbcat. A \u02bbdog! \u02bbetc.",
"",
"-1",
"A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
"a \u02bbCaT. A \u02bbdOg! \u02bbeTc.",
"A \u02bbCaT. A \u02bbdOg! \u02bbETc.",
"",
"3",
"L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
"\u02bbcAt! \u02bbeTc.",
"\u02bbCat! \u02bbetc.",
"",
"-2",
"", // -2=Trivial break iterator
"\u02bbcAt! \u02bbeTc.",
"\u02bbcat! \u02bbetc.",
"",
"-2",
"A", // U_TITLECASE_NO_BREAK_ADJUSTMENT
"\u02bbcAt! \u02bbeTc.",
"\u02bbCAt! \u02bbeTc.",
"",
"-2",
"L", // U_TITLECASE_NO_LOWERCASE
"\u02bbcAt! \u02bbeTc.",
"\u02bbcAt! \u02bbeTc.",
"",
"-2",
"AL", // Both options
// Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
// when TITLECASE_NO_LOWERCASE encounters a single-letter word
"a b c",
"A B C",
"",
"1",
"L" // U_TITLECASE_NO_LOWERCASE
};
/**
* <p>basic string, lower string, upper string, title string</p>
*/
private static final String SPECIAL_DATA_[] = {
UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
"ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " +
UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414),
"ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " +
UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C),
"AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " +
UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414),
// sigmas followed/preceded by cased letters
"i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ",
"i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ",
"I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 "
};
private static final Locale SPECIAL_LOCALES_[] = {
null,
ENGLISH_LOCALE_,
null,
};
private static final String SPECIAL_DOTTED_ =
"I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301";
private static final String SPECIAL_DOTTED_LOWER_TURKISH_ =
"\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301";
private static final String SPECIAL_DOTTED_LOWER_GERMAN_ =
"i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301";
private static final String SPECIAL_DOT_ABOVE_ =
"a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307";
private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ =
"A\u0307 \u0307 I J\u0327 J\u0301\u0307";
private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ =
"A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307";
private static final String SPECIAL_DOT_ABOVE_UPPER_ =
"I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128";
private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ =
"i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303";
private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ =
"i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129";
// private methods -------------------------------------------------------
/**
* Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/
private String[] getUnicodeStrings(String str)
{
List<String> v = new ArrayList<>(10);
int start = 0;
for (int casecount = 4; casecount > 0; casecount --) {
int end = str.indexOf("; ", start);
String casestr = str.substring(start, end);
StringBuffer buffer = new StringBuffer();
int spaceoffset = 0;
while (spaceoffset < casestr.length()) {
int nextspace = casestr.indexOf(' ', spaceoffset);
if (nextspace == -1) {
nextspace = casestr.length();
}
buffer.append((char)Integer.parseInt(
casestr.substring(spaceoffset, nextspace),
16));
spaceoffset = nextspace + 1;
}
start = end + 2;
v.add(buffer.toString());
}
int comments = str.indexOf(" #", start);
if (comments != -1 && comments != start) {
if (str.charAt(comments - 1) == ';') {
comments --;
}
String conditions = str.substring(start, comments);
int offset = 0;
while (offset < conditions.length()) {
int spaceoffset = conditions.indexOf(' ', offset);
if (spaceoffset == -1) {
spaceoffset = conditions.length();
}
v.add(conditions.substring(offset, spaceoffset));
offset = spaceoffset + 1;
}
}
int size = v.size();
String result[] = new String[size];
for (int i = 0; i < size; i ++) {
result[i] = v.get(i);
}
return result;
}
}