blob: 6c58c060e6706f7eefb665b5ee36b0f7b4f8d2a0 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2010-2011, Google, International Business Machines *
* Corporation and others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.text;
import java.util.HashSet;
import java.util.Set;
import com.ibm.icu.lang.CharSequences;
/**
* Simple internal utility class for helping with getSource/TargetSet
*/
class SourceTargetUtility {
final Transform<String, String> transform;
final UnicodeSet sourceCache;
final Set<String> sourceStrings;
static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
static Normalizer2 NFC = Normalizer2.getNFCInstance();
//static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();
public SourceTargetUtility(Transform<String, String> transform) {
this(transform, null);
}
public SourceTargetUtility(Transform<String, String> transform, Normalizer2 normalizer) {
this.transform = transform;
if (normalizer != null) {
// synchronized (SourceTargetUtility.class) {
// if (NFC == null) {
// NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
// for (int i = 0; i <= 0x10FFFF; ++i) {
// String d = NFC.getDecomposition(i);
// if (d == null) {
// continue;
// }
// String s = NFC.normalize(d);
// if (!CharSequences.equals(i, s)) {
// continue;
// }
// // composes
// boolean first = false;
// for (int trailing : CharSequences.codePoints(d)) {
// if (first) {
// first = false;
// } else {
// TRAILING_COMBINING.add(trailing);
// }
// }
// }
// }
// }
sourceCache = new UnicodeSet("[:^ccc=0:]");
} else {
sourceCache = new UnicodeSet();
}
sourceStrings = new HashSet<String>();
for (int i = 0; i <= 0x10FFFF; ++i) {
String s = transform.transform(UTF16.valueOf(i));
boolean added = false;
if (!CharSequences.equals(i, s)) {
sourceCache.add(i);
added = true;
}
if (normalizer == null) {
continue;
}
String d = NFC.getDecomposition(i);
if (d == null) {
continue;
}
s = transform.transform(d);
if (!d.equals(s)) {
sourceStrings.add(d);
}
if (added) {
continue;
}
if (!normalizer.isInert(i)) {
sourceCache.add(i);
continue;
}
// see if any of the non-starters change s; if so, add i
// for (String ns : TRAILING_COMBINING) {
// String s2 = transform.transform(s + ns);
// if (!s2.startsWith(s)) {
// sourceCache.add(i);
// break;
// }
// }
// int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
// if (endOfFirst >= d.length()) {
// continue;
// }
// // now add all initial substrings
// for (int j = 1; j < d.length(); ++j) {
// if (!CharSequences.onCharacterBoundary(d, j)) {
// continue;
// }
// String dd = d.substring(0,j);
// s = transform.transform(dd);
// if (!dd.equals(s)) {
// sourceStrings.add(dd);
// }
// }
}
sourceCache.freeze();
}
public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
UnicodeSet targetSet) {
UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
sourceSet.addAll(affectedCharacters);
for (String s : affectedCharacters) {
targetSet.addAll(transform.transform(s));
}
for (String s : sourceStrings) {
if (myFilter.containsAll(s)) {
String t = transform.transform(s);
if (!s.equals(t)) {
targetSet.addAll(t);
sourceSet.addAll(s);
}
}
}
}
}