blob: 364a547a807428575b1aa6df6720a8bcfb607e68 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2007, international Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.CollectionUtilities.MultiComparator;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
/** Provides more flexible formatting of UnicodeSet patterns.
*/
public class PrettyPrinter {
private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
private boolean first = true;
private StringBuffer target = new StringBuffer();
private int firstCodePoint = -2;
private int lastCodePoint = -2;
private boolean compressRanges = true;
private String lastString = "";
private UnicodeSet toQuote = new UnicodeSet(patternWhitespace);
private Transliterator quoter = null;
private Comparator ordering;
private Comparator spaceComp = Collator.getInstance(ULocale.ROOT);
{
setOrdering(Collator.getInstance(ULocale.ROOT));
((RuleBasedCollator)spaceComp).setStrength(RuleBasedCollator.PRIMARY);
}
public Transliterator getQuoter() {
return quoter;
}
public PrettyPrinter setQuoter(Transliterator quoter) {
this.quoter = quoter;
return this; // for chaining
}
public boolean isCompressRanges() {
return compressRanges;
}
/**
* @param compressRanges if you want abcde instead of a-e, make this false
* @return
*/
public PrettyPrinter setCompressRanges(boolean compressRanges) {
this.compressRanges = compressRanges;
return this;
}
public Comparator getOrdering() {
return ordering;
}
/**
* @param ordering the resulting ordering of the list of characters in the pattern
* @return
*/
public PrettyPrinter setOrdering(Comparator ordering) {
this.ordering = new MultiComparator(new Comparator[] {ordering, new UTF16.StringComparator(true,false,0)});
return this;
}
public Comparator getSpaceComparator() {
return spaceComp;
}
/**
* @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
* @return this, for chaining
*/
public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
this.spaceComp = spaceComp;
return this;
}
public UnicodeSet getToQuote() {
return toQuote;
}
/**
* a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
* @param toQuote
*/
public PrettyPrinter setToQuote(UnicodeSet toQuote) {
toQuote = (UnicodeSet)toQuote.clone();
toQuote.addAll(patternWhitespace);
this.toQuote = toQuote;
return this;
}
/**
* Get the pattern for a particular set.
* @param uset
* @return formatted UnicodeSet
*/
public String toPattern(UnicodeSet uset) {
first = true;
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
// make sure that comparison separates all strings, even canonically equivalent ones
Set orderedStrings = new TreeSet(ordering);
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
if (it.codepoint == UnicodeSetIterator.IS_STRING) {
orderedStrings.add(it.string);
} else {
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
if (!putAtEnd.contains(i)) {
orderedStrings.add(UTF16.valueOf(i));
}
}
}
}
target.setLength(0);
target.append("[");
for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
appendUnicodeSetItem((String) it.next());
}
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
appendUnicodeSetItem(it.codepoint);
}
flushLast();
target.append("]");
String sresult = target.toString();
// double check the results. This can be removed once we have more tests.
// try {
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
// if (!uset.equals(doubleCheck)) {
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
// }
// } catch (RuntimeException e) {
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
// }
return sresult;
}
private PrettyPrinter appendUnicodeSetItem(String s) {
int cp;
if (UTF16.hasMoreCodePointsThan(s, 1)) {
flushLast();
addSpace(s);
target.append("{");
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
appendQuoted(cp = UTF16.charAt(s, i));
}
target.append("}");
lastString = s;
} else {
appendUnicodeSetItem(UTF16.charAt(s, 0));
}
return this;
}
private void appendUnicodeSetItem(int cp) {
if (!compressRanges)
flushLast();
if (cp == lastCodePoint + 1) {
lastCodePoint = cp; // continue range
} else { // start range
flushLast();
firstCodePoint = lastCodePoint = cp;
}
}
/**
*
*/
private void addSpace(String s) {
if (first) {
first = false;
} else if (spaceComp.compare(s, lastString) != 0) {
target.append(' ');
} else {
int cp = UTF16.charAt(s,0);
int type = UCharacter.getType(cp);
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
target.append(' ');
} else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
target.append(' '); // make sure we don't accidentally merge two surrogates
}
}
}
private void flushLast() {
if (lastCodePoint >= 0) {
addSpace(UTF16.valueOf(firstCodePoint));
if (firstCodePoint != lastCodePoint) {
appendQuoted(firstCodePoint);
target.append(firstCodePoint + 1 == lastCodePoint ? ' ' : '-');
}
appendQuoted(lastCodePoint);
lastString = UTF16.valueOf(lastCodePoint);
firstCodePoint = lastCodePoint = -2;
}
}
PrettyPrinter appendQuoted(int codePoint) {
if (toQuote.contains(codePoint)) {
if (quoter != null) {
target.append(quoter.transliterate(UTF16.valueOf(codePoint)));
return this;
}
if (codePoint > 0xFFFF) {
target.append("\\U");
target.append(Utility.hex(codePoint,8));
} else {
target.append("\\u");
target.append(Utility.hex(codePoint,4));
}
return this;
}
switch (codePoint) {
case '[': // SET_OPEN:
case ']': // SET_CLOSE:
case '-': // HYPHEN:
case '^': // COMPLEMENT:
case '&': // INTERSECTION:
case '\\': //BACKSLASH:
case '{':
case '}':
case '$':
case ':':
target.append('\\');
break;
default:
// Escape whitespace
if (patternWhitespace.contains(codePoint)) {
target.append('\\');
}
break;
}
UTF16.append(target, codePoint);
return this;
}
// Appender append(String s) {
// target.append(s);
// return this;
// }
// public String toString() {
// return target.toString();
// }
}