blob: bb8c63f004a1e92d66b97b70f776831a21969f79 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2002, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
* $Date: 2003/11/21 19:10:43 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.text.*;
import com.ibm.icu.lang.*;
import com.ibm.icu.util.*;
import com.ibm.icu.impl.*;
import java.io.*;
import java.util.*;
//import java.util.regex.*;
import java.text.MessageFormat;
public class BagFormatter {
boolean abbreviated = false;
/**
* Compare two UnicodeSets, and show the differences
* @param name1 name of first set to be compared
* @param set1 first set
* @param name2 name of second set to be compared
* @param set2 second set
* @return formatted string
*/
public String showSetDifferences(
String name1,
UnicodeSet set1,
String name2,
UnicodeSet set2) {
StringWriter sw = new StringWriter();
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
sw.flush();
return sw.getBuffer().toString();
}
public String showSetDifferences(
String name1,
Collection set1,
String name2,
Collection set2) {
StringWriter sw = new StringWriter();
showSetDifferences(new PrintWriter(sw), name1, set1, name2, set2);
sw.flush();
return sw.getBuffer().toString();
}
/**
* Compare two UnicodeSets, and show the differences
* @param name1 name of first set to be compared
* @param set1 first set
* @param name2 name of second set to be compared
* @param set2 second set
* @return formatted string
*/
public void showSetDifferences(
PrintWriter pw,
String name1,
UnicodeSet set1,
String name2,
UnicodeSet set2) {
String[] names = { name1, name2 };
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
pw.println();
showSetNames(pw, inOut.format(names), temp);
temp = new UnicodeSet(set2).removeAll(set1);
pw.println();
showSetNames(pw, outIn.format(names), temp);
temp = new UnicodeSet(set2).retainAll(set1);
pw.println();
showSetNames(pw, inIn.format(names), temp);
}
public void showSetDifferences(
PrintWriter pw,
String name1,
Collection set1,
String name2,
Collection set2) {
String[] names = { name1, name2 };
// damn'd collection doesn't have a clone, so
// we go with Set, even though that
// may not preserve order and duplicates
Collection temp = new HashSet(set1);
temp.removeAll(set2);
pw.println();
showSetNames(pw, inOut.format(names), temp);
temp.clear();
temp.addAll(set2);
temp.removeAll(set1);
pw.println();
showSetNames(pw, outIn.format(names), temp);
temp.clear();
temp.addAll(set1);
temp.retainAll(set2);
pw.println();
showSetNames(pw, inIn.format(names), temp);
}
public String showSetNames(String title, Object set1) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
showSetNames(pw, title, set1);
pw.flush();
String result = sw.getBuffer().toString();
pw.close();
return result;
}
/**
* Returns a list of items in the collection, with each separated by the separator.
* Each item must not be null; its toString() is called for a printable representation
* @param c source collection
* @param separator to be placed between any strings
* @return
* @internal
*/
public void showSetNames(PrintWriter output, String title, Object c) {
output.println(title);
mainVisitor.output = output;
mainVisitor.doAt(c);
}
/**
* Returns a list of items in the collection, with each separated by the separator.
* Each item must not be null; its toString() is called for a printable representation
* @param c source collection
* @param separator to be placed between any strings
* @return
* @internal
*/
public void showSetNames(String filename, String title, Object c) throws IOException {
PrintWriter pw = new PrintWriter(
new OutputStreamWriter(
new FileOutputStream(filename),"utf-8"));
}
public String getAbbreviatedName(
String source,
String pattern,
String substitute) {
int matchEnd = NameIterator.findMatchingEnd(source, pattern);
int sdiv = source.length() - matchEnd;
int pdiv = pattern.length() - matchEnd;
StringBuffer result = new StringBuffer();
addMatching(
source.substring(0, sdiv),
pattern.substring(0, pdiv),
substitute,
result);
addMatching(
source.substring(sdiv),
pattern.substring(pdiv),
substitute,
result);
return result.toString();
}
abstract public static class Relation {
abstract public String getRelation(String a, String b);
}
static class NullRelation extends Relation {
public String getRelation(String a, String b) { return ""; }
}
private Relation r = new NullRelation();
public BagFormatter setRelation(Relation r) {
this.r = r;
return this; // for chaining
}
public Relation getRelation() {
return r;
}
/*
r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
*/
/*
static final UnicodeSet NO_NAME =
new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
static final UnicodeSet NAME_CHARACTERS =
new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
public UnicodeSet getSetForName(String namePattern) {
UnicodeSet result = new UnicodeSet();
Matcher m = Pattern.compile(namePattern).matcher("");
// check for no-name items, and add in bulk
m.reset("<no name>");
if (m.matches()) {
result.addAll(NO_NAME);
}
// check all others
UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
while (usi.next()) {
String name = getName(usi.codepoint);
if (name == null)
continue;
m.reset(name);
if (m.matches()) {
result.add(usi.codepoint);
}
}
// Note: if Regex had some API so that if we could tell that
// an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
// then we could optimize by skipping whole swathes of characters
return result;
}
*/
public void setMergeRanges(boolean in) {
mergeRanges = in;
}
public void setShowSetAlso(boolean b) {
showSetAlso = b;
}
public String getName(int codePoint) {
String hcp = "U+" + Utility.hex(codePoint, 4) + " ";
String result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + result;
String prop = catProp.getPropertyValue(codePoint);
if (prop.equals("Control")) {
result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + "<" + result + ">";
}
return hcp + "<reserved>";
}
UnicodePropertySource source;
UnicodePropertySource labelSource;
UnicodePropertySource nameProp;
UnicodePropertySource name1Prop;
UnicodePropertySource catProp;
UnicodePropertySource shortCatProp;
public void setUnicodePropertySource(UnicodePropertySource source) {
this.source = source;
nameProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Name");
name1Prop = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Unicode_1_Name");
catProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category");
shortCatProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT);
}
{
setUnicodePropertySource(new UnicodePropertySource.ICU());
Map labelMap = new HashMap();
labelMap.put("Lo","L&");
labelMap.put("Lu","L&");
labelMap.put("Lt","L&");
setLabelSource(new UnicodePropertySource.ICU()
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT)
.setFilter(
new UnicodePropertySource.MapFilter().setMap(labelMap)));
}
// ===== PRIVATES =====
private Visitor.Join labelVisitor = new Visitor.Join();
private boolean mergeRanges = true;
private boolean literalCharacter = false;
private boolean showSetAlso = false;
private RangeFinder rf = new RangeFinder();
private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
private MyVisitor mainVisitor = new MyVisitor();
/*
private String getLabels(int start, int end) {
Set names = new TreeSet();
for (int cp = start; cp <= end; ++cp) {
names.add(getLabel(cp));
}
return labelVisitor.join(names);
}
*/
private void addMatching(
String source,
String pattern,
String substitute,
StringBuffer result) {
NameIterator n1 = new NameIterator(source);
NameIterator n2 = new NameIterator(pattern);
boolean first = true;
while (true) {
String s1 = n1.next();
if (s1 == null)
break;
String s2 = n2.next();
if (!first)
result.append(" ");
first = false;
if (s1.equals(s2))
result.append(substitute);
else
result.append(s1);
}
}
private Tabber singleTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
6,
Tabber.LEFT,
10,
Tabber.LEFT,
14,
Tabber.LEFT });
private Tabber rangeTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
14,
Tabber.LEFT,
18,
Tabber.LEFT,
27,
Tabber.LEFT,
34,
Tabber.LEFT });
private static NumberFormat nf =
NumberFormat.getIntegerInstance(Locale.ENGLISH);
private class MyVisitor extends Visitor {
PrintWriter output;
public String format(Object o) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
doAt(o);
pw.flush();
String result = sw.getBuffer().toString();
pw.close();
return result;
}
protected void doBefore(Object container, Object o) {
if (showSetAlso && container instanceof UnicodeSet) {
output.println("# " + container);
}
}
protected void doBetween(Object container, Object lastItem, Object nextItem) {
}
protected void doAfter(Object container, Object o) {
output.println("# Total: " + nf.format(count(container)));
}
protected void doSimpleAt(Object o) {
if (o instanceof Map.Entry) {
Map.Entry oo = (Map.Entry)o;
Object key = oo.getKey();
Object value = oo.getValue();
doBefore(o, key);
doAt(key);
output.print("->");
doAt(value);
doAfter(o, value);
} else if (o instanceof Visitor.CodePointRange) {
doAt((Visitor.CodePointRange) o);
} else {
output.print(o.toString());
}
}
protected void doAt(Visitor.CodePointRange usi) {
if (!mergeRanges) {
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
String label = labelSource.getPropertyValue(cp);
if (label.length() != 0)
label += " ";
output.println(
singleTabber.process(
Utility.hex(cp, 4)
+ " \t# "
+ label
+ (literalCharacter
&& (cp >= 0x20)
? " \t(" + UTF16.valueOf(cp) + ") "
: "")
+ " \t"
+ getName(cp)));
}
} else {
rf.reset(usi.codepoint, usi.codepointEnd + 1);
String label;
while ((label = rf.next()) != null) {
/*
String label = (usi.codepoint != usi.codepointEnd)
? label = getLabels(usi.codepoint, usi.codepointEnd)
: getLabel(usi.codepoint);
*/
int start = rf.start;
int end = rf.limit - 1;
if (label.length() != 0)
label += " ";
output.println(
rangeTabber.process(
Utility.hex(start, 4)
+ ((start != end)
? (".." + Utility.hex(end, 4))
: "")
+ " \t# "
+ label
+ " \t["
+ nf.format(end - start + 1)
+ "]"
+ (literalCharacter
&& (start >= 0x20)
? " \t("
+ UTF16.valueOf(start)
+ ((start != end)
? (".." + UTF16.valueOf(end))
: "")
+ ") "
: "")
+ " \t"
+ getName(start)
+ ((start != end)
? (".."
+ (abbreviated
? getAbbreviatedName(
getName(end),
getName(start),
"~")
: getName(end)))
: "")));
}
}
}
}
/**
* Iterate through a string, breaking at words.
* @author Davis
*/
private static class NameIterator {
String source;
int position;
int start;
int limit;
NameIterator(String source) {
this.source = source;
this.start = 0;
this.limit = source.length();
}
/**
* Find next word, including trailing spaces
* @return
*/
String next() {
if (position >= limit)
return null;
int pos = source.indexOf(' ', position);
if (pos < 0 || pos >= limit)
pos = limit;
String result = source.substring(position, pos);
position = pos + 1;
return result;
}
static int findMatchingEnd(String s1, String s2) {
int i = s1.length();
int j = s2.length();
try {
while (true) {
--i; // decrement both before calling function!
--j;
if (s1.charAt(i) != s2.charAt(j))
break;
}
} catch (Exception e) {} // run off start
++i; // counteract increment
i = s1.indexOf(' ', i); // move forward to space
if (i < 0)
return 0;
return s1.length() - i;
}
}
private class RangeFinder {
int start, limit;
private int veryLimit;
void reset(int start, int end) {
this.limit = start;
this.veryLimit = end;
}
String next() {
if (limit >= veryLimit)
return null;
start = limit;
String label = labelSource.getPropertyValue(limit++);
for (; limit < veryLimit; ++limit) {
String s = labelSource.getPropertyValue(limit);
if (!s.equals(label))
break;
}
return label;
}
}
public boolean isAbbreviated() {
return abbreviated;
}
public void setAbbreviated(boolean b) {
abbreviated = b;
}
public UnicodePropertySource getSource() {
return source;
}
public UnicodePropertySource getLabelSource() {
return labelSource;
}
public void setLabelSource(UnicodePropertySource source) {
labelSource = source;
}
/**
* @deprecated
*/
public static void addAll(UnicodeSet source, Collection target) {
source.addAllTo(target);
}
// UTILITIES
public static final Transliterator hex = Transliterator.getInstance(
"[^\\u0021-\\u007E\\u00A0-\\u00FF] hex");
public interface Shower {
public void println(String arg);
}
public static Shower CONSOLE = new Shower() {
public void println(String arg) {
System.out.println(arg);
}
};
public static BufferedReader openUTF8Reader(String dir, String filename, Shower shower) throws IOException {
File file = new File(dir + filename);
if (shower != null) {
shower.println("Creating File: "
+ file.getCanonicalPath());
}
return new BufferedReader(
new InputStreamReader(
new FileInputStream(file),
"UTF-8"),
4*1024);
}
public static PrintWriter openUTF8Writer(String dir, String filename, Shower shower) throws IOException {
File file = new File(dir + filename);
if (shower != null) {
shower.println("Creating File: "
+ file.getCanonicalPath());
}
//File parent = new File(file.getParent());
//parent.mkdirs();
return new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(file),
"UTF-8"),
4*1024));
}
}