src/com/ibm/icu/dev/test/util/BagFormatter.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2002-2004, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */
 package com.ibm.icu.dev.test.util;

 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.text.MessageFormat;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map;

 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;

 public class BagFormatter {
     static final boolean DEBUG = false;
     public static final boolean SHOW_FILES = System.getProperty("SHOW_FILES") != null;

     private static final String BASE_RULES =
 	  	"'<' > '&lt;' ;" +
 	    "'<' < '&'[lL][Tt]';' ;" +
 	    "'&' > '&amp;' ;" +
 	    "'&' < '&'[aA][mM][pP]';' ;" +
 	    "'>' < '&'[gG][tT]';' ;" +
 	    "'\"' < '&'[qQ][uU][oO][tT]';' ; " +
 	    "'' < '&'[aA][pP][oO][sS]';' ; ";

     private static final String CONTENT_RULES =
         "'>' > '&gt;' ;";

     private static final String HTML_RULES = BASE_RULES + CONTENT_RULES +
 	    "'\"' > '&quot;' ; ";

     private static final String XML_RULES = HTML_RULES +
 	    "'' > '&apos;' ; ";

     /*
 The ampersand character (&) and the left angle bracket (<) MUST NOT appear

 in their literal form, except when used as markup delimiters, or within a

 comment, a processing instruction, or a CDATA section. If they are needed

 elsewhere, they MUST be escaped using either numeric character references or

 the strings "&amp;" and "&lt;" respectively. The right angle bracket (>) MAY

 be represented using the string "&gt;", and MUST, for compatibility, be

 escaped using either "&gt;" or a character reference when it appears in the string

 "]]>" in content, when that string is not marking the end of a CDATA section.

 In the content of elements, character data is any string of characters which does

 not contain the start-delimiter of any markup and does not include the

 CDATA-section-close delimiter, "]]>". In a CDATA section, character data is

 any string of characters not including the CDATA-section-close delimiter,

 "]]>".

 To allow attribute values to contain both single and double quotes, the

 apostrophe or single-quote character (') MAY be represented as "&apos;", and

 the double-quote character (") as "&quot;".


      */

     public static final Transliterator toXML = Transliterator.createFromRules(
             "any-xml", XML_RULES, Transliterator.FORWARD);
     public static final Transliterator fromXML = Transliterator.createFromRules(
             "xml-any", XML_RULES, Transliterator.REVERSE);

     public static final Transliterator toHTML = Transliterator.createFromRules(
             "any-html", HTML_RULES, Transliterator.FORWARD);
     public static final Transliterator fromHTML = Transliterator.createFromRules(
             "html-any", HTML_RULES, Transliterator.REVERSE);

     public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);

     private static PrintWriter log = CONSOLE;

     private boolean abbreviated = false;
     private String separator = ",";
     private String prefix = "[";
     private String suffix = "]";
     private UnicodeProperty.Factory source;
     private UnicodeLabel nameSource;
     private UnicodeLabel labelSource;
     private UnicodeLabel rangeBreakSource;
     private UnicodeLabel valueSource;
     private String propName = "";
     private boolean showCount = true;
     private boolean skipNullValues = true;
     //private boolean suppressReserved = true;
     private boolean hexValue = false;
     private static final String NULL_VALUE = "_NULL_VALUE_";
     private int fullTotal = -1;
     private String lineSeparator = "\r\n";

     /**
      * Compare two UnicodeSets, and show the differences
      * @param name1 name of first set to be compared
      * @param set1 first set
      * @param name2 name of second set to be compared
      * @param set2 second set
      * @return formatted string
      */
     public String showSetDifferences(
         String name1,
         UnicodeSet set1,
         String name2,
         UnicodeSet set2) {

         StringWriter result = new StringWriter();
         showSetDifferences(new PrintWriter(result),name1,set1,name2,set2);
         result.flush();
         return result.getBuffer().toString();
     }

     public String showSetDifferences(
         String name1,
         Collection set1,
         String name2,
         Collection set2) {

         StringWriter result = new StringWriter();
         showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);
         result.flush();
         return result.getBuffer().toString();
     }

     /**
      * Compare two UnicodeSets, and show the differences
      * @param name1 name of first set to be compared
      * @param set1 first set
      * @param name2 name of second set to be compared
      * @param set2 second set
      * @return formatted string
      */
     public void showSetDifferences(
         PrintWriter pw,
         String name1,
         UnicodeSet set1,
         String name2,
         UnicodeSet set2) {
         if (pw == null) pw = CONSOLE;
         String[] names = { name1, name2 };

         UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
         pw.println();
         pw.println(inOut.format(names));
         showSetNames(pw, temp);

         temp = new UnicodeSet(set2).removeAll(set1);
         pw.println();
         pw.println(outIn.format(names));
         showSetNames(pw, temp);

         temp = new UnicodeSet(set2).retainAll(set1);
         pw.println();
         pw.println(inIn.format(names));
         showSetNames(pw, temp);
         pw.flush();
     }

     public void showSetDifferences(
         PrintWriter pw,
         String name1,
         Collection set1,
         String name2,
         Collection set2) {

         if (pw == null) pw = CONSOLE;
         String[] names = { name1, name2 };
         // damn'd collection doesn't have a clone, so
         // we go with Set, even though that
         // may not preserve order and duplicates
         Collection temp = new HashSet(set1);
         temp.removeAll(set2);
         pw.println();
         pw.println(inOut.format(names));
         showSetNames(pw, temp);

         temp.clear();
         temp.addAll(set2);
         temp.removeAll(set1);
         pw.println();
         pw.println(outIn.format(names));
         showSetNames(pw, temp);

         temp.clear();
         temp.addAll(set1);
         temp.retainAll(set2);
         pw.println();
         pw.println(inIn.format(names));
         showSetNames(pw, temp);
     }

     /**
      * Returns a list of items in the collection, with each separated by the separator.
      * Each item must not be null; its toString() is called for a printable representation
      * @param c source collection
      * @return a String representation of the list
      * @internal
      */
     public String showSetNames(Object c) {
         StringWriter buffer = new StringWriter();
         PrintWriter output = new PrintWriter(buffer);
         showSetNames(output,c);
         return buffer.toString();
     }

     /**
      * Returns a list of items in the collection, with each separated by the separator.
      * Each item must not be null; its toString() is called for a printable representation
      * @param output destination to which to write names
      * @param c source collection
      * @internal
      */
     public void showSetNames(PrintWriter output, Object c) {
         mainVisitor.doAt(c, output);
         output.flush();
     }

     /**
      * Returns a list of items in the collection, with each separated by the separator.
      * Each item must not be null; its toString() is called for a printable representation
      * @param filename destination to which to write names
      * @param c source collection
      * @internal
      */
     public void showSetNames(String filename, Object c) throws IOException {
         PrintWriter pw = new PrintWriter(
             new OutputStreamWriter(
                 new FileOutputStream(filename),"utf-8"));
         showSetNames(log,c);
         pw.close();
     }

     public String getAbbreviatedName(
         String source,
         String pattern,
         String substitute) {

         int matchEnd = NameIterator.findMatchingEnd(source, pattern);
         int sdiv = source.length() - matchEnd;
         int pdiv = pattern.length() - matchEnd;
         StringBuffer result = new StringBuffer();
         addMatching(
             source.substring(0, sdiv),
             pattern.substring(0, pdiv),
             substitute,
             result);
         addMatching(
             source.substring(sdiv),
             pattern.substring(pdiv),
             substitute,
             result);
         return result.toString();
     }

     abstract public static class Relation {
         abstract public String getRelation(String a, String b);
     }

     static class NullRelation extends Relation {
         public String getRelation(String a, String b) { return ""; }
     }

     private Relation r = new NullRelation();

     public BagFormatter setRelation(Relation r) {
         this.r = r;
         return this; // for chaining
     }

     public Relation getRelation() {
         return r;
     }

     /*
      r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
     */
     /*
     static final UnicodeSet NO_NAME =
         new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
     static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
     static final UnicodeSet NAME_CHARACTERS =
         new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");

     public UnicodeSet getSetForName(String namePattern) {
         UnicodeSet result = new UnicodeSet();
         Matcher m = Pattern.compile(namePattern).matcher("");
         // check for no-name items, and add in bulk
         m.reset("<no name>");
         if (m.matches()) {
             result.addAll(NO_NAME);
         }
         // check all others
         UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
         while (usi.next()) {
             String name = getName(usi.codepoint);
             if (name == null)
                 continue;
             m.reset(name);
             if (m.matches()) {
                 result.add(usi.codepoint);
             }
         }
         // Note: if Regex had some API so that if we could tell that
         // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
         // then we could optimize by skipping whole swathes of characters
         return result;
     }
     */

     public BagFormatter setMergeRanges(boolean in) {
         mergeRanges = in;
         return this;
     }
     public BagFormatter setShowSetAlso(boolean b) {
         showSetAlso = b;
         return this;
     }

     public String getName(int codePoint) {
         return getName("", codePoint, codePoint);
     }

     public String getName(String separator, int start, int end) {
         if (getNameSource() == null || getNameSource() == UnicodeProperty.NULL) return "";
         String result = getName(start, false);
         if (start == end) return separator + result;
         String endString = getName(end, false);
         if (result.length() == 0 && endString.length() == 0) return separator;
         if (abbreviated) endString = getAbbreviatedName(endString,result,"~");
         return separator + result + ".." + endString;
     }

     public String getName(String s) {
         return getName(s, false);
     }

     class NameLabel extends UnicodeLabel {
         UnicodeProperty nameProp;
         UnicodeSet control;
         UnicodeSet private_use;
         UnicodeSet noncharacter;
         UnicodeSet surrogate;

         NameLabel(UnicodeProperty.Factory source) {
             nameProp = source.getProperty("Name");
             control = source.getSet("gc=Cc");
             private_use = source.getSet("gc=Co");
             surrogate = source.getSet("gc=Cs");
             noncharacter = source.getSet("noncharactercodepoint=true");
         }

         public String getValue(int codePoint, boolean isShort) {
             String hcp = !isShort
                 ? "U+" + Utility.hex(codePoint, 4) + " "
                 : "";
             String result = nameProp.getValue(codePoint);
             if (result != null)
                 return hcp + result;
             if (control.contains(codePoint)) return "<control-" + Utility.hex(codePoint, 4) + ">";
             if (private_use.contains(codePoint)) return "<private-use-" + Utility.hex(codePoint, 4) + ">";
             if (noncharacter.contains(codePoint)) return "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
             if (surrogate.contains(codePoint)) return "<surrogate-" + Utility.hex(codePoint, 4) + ">";
             //if (suppressReserved) return "";
             return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";
         }

     }

     // refactored
     public String getName(int codePoint, boolean withCodePoint) {
         return getNameSource().getValue(codePoint, !withCodePoint);
     }

     public String getName(String s, boolean withCodePoint) {
         return getNameSource().getValue(s, separator, !withCodePoint);
     }

     public String hex(String s) {
         return hex(s,separator);
     }

     public String hex(String s, String separator) {
         return UnicodeLabel.HEX.getValue(s, separator, true);
     }

     public String hex(int start, int end) {
         String s = Utility.hex(start,4);
         if (start == end) return s;
         return s + ".." + Utility.hex(end,4);
     }

     public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
         this.source = source;
         return this;
     }

     public UnicodeProperty.Factory getUnicodePropertyFactory() {
         if (source == null) source = ICUPropertyFactory.make();
         return source;
     }

     public BagFormatter () {
     }

     public BagFormatter (UnicodeProperty.Factory source) {
         setUnicodePropertyFactory(source);
     }

     public String join(Object o) {
         return labelVisitor.join(o);
     }

     // ===== PRIVATES =====

     private Join labelVisitor = new Join();

     private boolean mergeRanges = true;
     private Transliterator showLiteral = null;
     private boolean showSetAlso = false;

     private RangeFinder rf = new RangeFinder();

     private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
     private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
     private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");

     private MyVisitor mainVisitor = new MyVisitor();

     /*
     private String getLabels(int start, int end) {
         Set names = new TreeSet();
         for (int cp = start; cp <= end; ++cp) {
             names.add(getLabel(cp));
         }
         return labelVisitor.join(names);
     }
     */

     private void addMatching(
         String source,
         String pattern,
         String substitute,
         StringBuffer result) {
         NameIterator n1 = new NameIterator(source);
         NameIterator n2 = new NameIterator(pattern);
         boolean first = true;
         while (true) {
             String s1 = n1.next();
             if (s1 == null)
                 break;
             String s2 = n2.next();
             if (!first)
                 result.append(" ");
             first = false;
             if (s1.equals(s2))
                 result.append(substitute);
             else
                 result.append(s1);
         }
     }

     private static NumberFormat nf =
         NumberFormat.getIntegerInstance(Locale.ENGLISH);
     static {
         nf.setGroupingUsed(false);
     }

     private class MyVisitor extends Visitor {
         private PrintWriter output;
         Tabber.MonoTabber myTabber;
         String commentSeparator;
         int counter;
         int valueSize;
         int labelSize;

         public void doAt(Object c, PrintWriter output) {
             this.output = output;
             counter = 0;
             myTabber = new Tabber.MonoTabber();
             myTabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);

             if (propName.length() > 0) myTabber.add(propName.length() + 2,Tabber.LEFT);

             valueSize = getValueSource().getMaxWidth(shortValue);
             if (DEBUG) System.out.println("ValueSize: " + valueSize);
             if (valueSize > 0) myTabber.add(valueSize + 2,Tabber.LEFT); // value

             myTabber.add(3,Tabber.LEFT); // comment character

             labelSize = getLabelSource(true).getMaxWidth(shortLabel);
             if (labelSize > 0) myTabber.add(labelSize + 1,Tabber.LEFT); // value

             if (mergeRanges && showCount) myTabber.add(5,Tabber.RIGHT);

             if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
             //myTabber.add(7,Tabber.LEFT);

             commentSeparator = (showCount || showLiteral != null
               || getLabelSource(true) != UnicodeProperty.NULL
               || getNameSource() != UnicodeProperty.NULL)
             ? "\t #" : "";

             if (DEBUG) System.out.println("Tabber: " + myTabber.toString());
             if (DEBUG) System.out.println("Tabber: " + myTabber.process("a\tb\td\td\tf\tg\th"));
             doAt(c);
         }

         public String format(Object o) {
             StringWriter sw = new StringWriter();
             PrintWriter pw = new PrintWriter(sw);
             doAt(o);
             pw.flush();
             String result = sw.getBuffer().toString();
             pw.close();
             return result;
         }

         protected void doBefore(Object container, Object o) {
             if (showSetAlso && container instanceof UnicodeSet) {
                 output.print("#" + container +  lineSeparator );
             }
         }

         protected void doBetween(Object container, Object lastItem, Object nextItem) {
         }

         protected void doAfter(Object container, Object o) {
             if (fullTotal != -1 && fullTotal != counter) {
                 output.print(lineSeparator);
                 output.print("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here." + lineSeparator);
                 output.print("# Total code points: " + nf.format(fullTotal) + lineSeparator);
                 fullTotal = -1;
             } else {
                 output.print(lineSeparator);
                 output.print("# Total code points: " + nf.format(counter) + lineSeparator);
             }
         }

         protected void doSimpleAt(Object o) {
             if (o instanceof Map.Entry) {
                 Map.Entry oo = (Map.Entry)o;
                 Object key = oo.getKey();
                 Object value = oo.getValue();
                 doBefore(o, key);
                 doAt(key);
                 output.print("->");
                 doAt(value);
                 doAfter(o, value);
                 counter++;
             } else if (o instanceof Visitor.CodePointRange) {
                 doAt((Visitor.CodePointRange) o);
             } else {
                 String thing = o.toString();
                 output.print(
                     myTabber.process(
                         hex(thing)
                             + commentSeparator
                             + insertLiteral(thing)
                             + "\t"
                             + getName(thing))
                     +  lineSeparator );
                 counter++;
             }
         }

         protected void doAt(Visitor.CodePointRange usi) {
             if (!mergeRanges) {
                 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
                     showLine(cp, cp);
                 }
             } else {
                 rf.reset(usi.codepoint, usi.codepointEnd + 1);
                 while (rf.next()) {
                     showLine(rf.start, rf.limit - 1);
                 }
             }
         }

         private void showLine(int start, int end) {
             String label = getLabelSource(true).getValue(start, shortLabel);
             String value = getValue(start, shortValue);
             if (value == NULL_VALUE) return;

             counter += end - start + 1;
             String pn = propName;
             if (pn.length() != 0) {
                 pn = "\t; " + pn;
             }
             if (valueSize > 0) {
                 value = "\t; " + value;
             } else if (value.length() > 0) {
                 throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
             }
             if (labelSize > 0) {
                 label = "\t" + label;
             } else if (label.length() > 0) {
                 throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel));
             }

             String count = "";
             if (mergeRanges && showCount) {
                 if (end == start) count = "\t";
                 else count = "\t ["+ nf.format(end - start + 1)+ "]";
            }

             output.print(
                 myTabber.process(
                     hex(start, end)
                         + pn
                         + value
                         + commentSeparator
                         + label
                         + count
                         + insertLiteral(start, end)
                         + getName("\t ", start, end))
                 +  lineSeparator );
         }

         private String insertLiteral(String thing) {
             return (showLiteral == null ? ""
                 :  " \t(" + showLiteral.transliterate(thing) + ") ");
         }

         private String insertLiteral(int start, int end) {
             return (showLiteral == null ? "" :
                 " \t(" + showLiteral.transliterate(UTF16.valueOf(start))
                         + ((start != end)
                             ? (".." + showLiteral.transliterate(UTF16.valueOf(end)))
                             : "")
                 + ") ");
         }
         /*
         private String insertLiteral(int cp) {
             return (showLiteral == null ? ""
                 :  " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
         }
         */
     }

     /**
      * Iterate through a string, breaking at words.
      * @author Davis
      */
     private static class NameIterator {
         String source;
         int position;
         int start;
         int limit;

         NameIterator(String source) {
             this.source = source;
             this.start = 0;
             this.limit = source.length();
         }
         /**
          * Find next word, including trailing spaces
          * @return the next word
          */
         String next() {
             if (position >= limit)
                 return null;
             int pos = source.indexOf(' ', position);
             if (pos < 0 || pos >= limit)
                 pos = limit;
             String result = source.substring(position, pos);
             position = pos + 1;
             return result;
         }

         static int findMatchingEnd(String s1, String s2) {
             int i = s1.length();
             int j = s2.length();
             try {
                 while (true) {
                     --i; // decrement both before calling function!
                     --j;
                     if (s1.charAt(i) != s2.charAt(j))
                         break;
                 }
             } catch (Exception e) {} // run off start

             ++i; // counteract increment
             i = s1.indexOf(' ', i); // move forward to space
             if (i < 0)
                 return 0;
             return s1.length() - i;
         }
     }

     private class RangeFinder {
         int start, limit;
         private int veryLimit;
         //String label, value;
         void reset(int start, int limit) {
             this.limit = start;
             this.veryLimit = limit;
         }
         boolean next() {
             if (limit >= veryLimit)
                 return false;
             start = limit; // set to end of last
             String label = getLabelSource(false).getValue(limit, true);
             String value = getValue(limit, true);
             String breaker = getRangeBreakSource().getValue(limit,true);
             if (DEBUG && limit < 0x7F) System.out.println("Label: " + label + ", Value: " + value + ", Break: " + breaker);
             limit++;
             for (; limit < veryLimit; limit++) {
                 String s = getLabelSource(false).getValue(limit, true);
                 String v = getValue(limit, true);
                 String b = getRangeBreakSource().getValue(limit, true);
                 if (DEBUG && limit < 0x7F) System.out.println("*Label: " + label + ", Value: " + value + ", Break: " + breaker);
                 if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) break;
             }
             // at this point, limit is the first item that has a different label than source
             // OR, we got to the end, and limit == veryLimit
             return true;
         }
     }

     boolean equalTo(Object a, Object b) {
         if (a == b) return true;
         if (a == null) return false;
         return a.equals(b);
     }

     boolean shortLabel = true;
     boolean shortValue = true;

     public String getPrefix() {
         return prefix;
     }

     public String getSuffix() {
         return suffix;
     }

     public BagFormatter setPrefix(String string) {
         prefix = string;
         return this;
     }

     public BagFormatter setSuffix(String string) {
         suffix = string;
         return this;
     }

     public boolean isAbbreviated() {
         return abbreviated;
     }

     public BagFormatter setAbbreviated(boolean b) {
         abbreviated = b;
         return this;
     }

     public UnicodeLabel getLabelSource(boolean visible) {
         if (labelSource == null) {
             Map labelMap = new HashMap();
             //labelMap.put("Lo","L&");
             labelMap.put("Lu","L&");
             labelMap.put("Lt","L&");
             labelMap.put("Ll","L&");
             labelSource = new UnicodeProperty.FilteredProperty(
                 getUnicodePropertyFactory().getProperty("General_Category"),
                 new UnicodeProperty.MapFilter(labelMap)
             ).setAllowValueAliasCollisions(true);
         }
         return labelSource;
     }

     /**
      * @deprecated
      */
     public static void addAll(UnicodeSet source, Collection target) {
         source.addAllTo(target);
     }

     // UTILITIES

     public static final Transliterator hex = Transliterator.getInstance(
         "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");

     public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {
         return openReader(dir,filename,"UTF-8");
     }

     public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
         File file = new File(dir + filename);
         if (SHOW_FILES && log != null) {
             log.println("Opening File: "
                 + file.getCanonicalPath());
         }
         return new BufferedReader(
             new InputStreamReader(
                 new FileInputStream(file),
                 encoding),
             4*1024);
     }

     public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException {
         return openWriter(dir,filename,"UTF-8");
     }

     public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException {
         File file = new File(dir + filename);
         if (SHOW_FILES && log != null) {
             log.println("Creating File: "
                 + file.getCanonicalPath());
         }
         String parentName = file.getParent();
         if (parentName != null) {
             File parent = new File(parentName);
             parent.mkdirs();
         }
         return new PrintWriter(
             new BufferedWriter(
                 new OutputStreamWriter(
                     new FileOutputStream(file),
                     encoding),
                 4*1024));
     }
     public static PrintWriter getLog() {
         return log;
     }
     public BagFormatter setLog(PrintWriter writer) {
         log = writer;
         return this;
     }
     public String getSeparator() {
         return separator;
     }
     public BagFormatter setSeparator(String string) {
         separator = string;
         return this;
     }
     public Transliterator getShowLiteral() {
         return showLiteral;
     }
     public BagFormatter setShowLiteral(Transliterator transliterator) {
         showLiteral = transliterator;
         return this;
     }

     // ===== CONVENIENCES =====
     private class Join extends Visitor {
         StringBuffer output = new StringBuffer();
         int depth = 0;
         String join (Object o) {
             output.setLength(0);
             doAt(o);
             return output.toString();
         }
         protected void doBefore(Object container, Object item) {
             ++depth;
             output.append(prefix);
         }
         protected void doAfter(Object container, Object item) {
             output.append(suffix);
             --depth;
         }
         protected void doBetween(Object container, Object lastItem, Object nextItem) {
             output.append(separator);
         }
         protected void doSimpleAt(Object o) {
             if (o != null) output.append(o.toString());
         }
     }

     /**
      * @param label
      */
     public BagFormatter setLabelSource(UnicodeLabel label) {
         if (label == null) label = UnicodeLabel.NULL;
         labelSource = label;
         return this;
     }

     /**
      * @return the NameLable representing the source
      */
     public UnicodeLabel getNameSource() {
         if (nameSource == null) {
             nameSource = new NameLabel(getUnicodePropertyFactory());
         }
         return nameSource;
     }

     /**
      * @param label
      */
     public BagFormatter setNameSource(UnicodeLabel label) {
         if (label == null) label = UnicodeLabel.NULL;
         nameSource = label;
         return this;
     }

     /**
      * @return the UnicodeLabel representing the value
      */
     public UnicodeLabel getValueSource() {
         if (valueSource == null) valueSource = UnicodeLabel.NULL;
         return valueSource;
     }

     private String getValue(int cp, boolean shortValue) {
         String result = getValueSource().getValue(cp, shortValue);
         if (result == null) return NULL_VALUE;
         if (hexValue) result = hex(result, " ");
         return result;
     }

     /**
      * @param label
      */
     public BagFormatter setValueSource(UnicodeLabel label) {
         if (label == null) label = UnicodeLabel.NULL;
         valueSource = label;
         return this;
     }

     public BagFormatter setValueSource(String label) {
         return setValueSource(new UnicodeLabel.Constant(label));
     }

     /**
      * @return true if showCount is true
      */
     public boolean isShowCount() {
         return showCount;
     }

     /**
      * @param b true to show the count
      * @return this (for chaining)
      */
     public BagFormatter setShowCount(boolean b) {
         showCount = b;
         return this;
     }

     /**
      * @return the property name
      */
     public String getPropName() {
         return propName;
     }

     /**
      * @param string
      * @return this (for chaining)
      */
     public BagFormatter setPropName(String string) {
         if (string == null) string = "";
         propName = string;
         return this;
     }

     /**
      * @return true if this is a hexValue
      */
     public boolean isHexValue() {
         return hexValue;
     }

     /**
      * @param b
      * @return this (for chaining)
      */
     public BagFormatter setHexValue(boolean b) {
         hexValue = b;
         return this;
     }

     /**
      * @return the full total
      */
     public int getFullTotal() {
         return fullTotal;
     }

     /**
      * @param i set the full total
      * @return this (for chaining)
      */
     public BagFormatter setFullTotal(int i) {
         fullTotal = i;
         return this;
     }

     /**
      * @return the line separator
      */
     public String getLineSeparator() {
         return lineSeparator;
     }

     /**
      * @param string
      * @return this (for chaining)
      */
     public BagFormatter setLineSeparator(String string) {
         lineSeparator = string;
         return this;
     }

     /**
      * @return the UnicodeLabel representing the range break source
      */
     public UnicodeLabel getRangeBreakSource() {
         if (rangeBreakSource == null) {
             Map labelMap = new HashMap();
             // reflects the code point types on p 25
             labelMap.put("Lo", "G&");
             labelMap.put("Lm", "G&");
             labelMap.put("Lu", "G&");
             labelMap.put("Lt", "G&");
             labelMap.put("Ll", "G&");
             labelMap.put("Mn", "G&");
             labelMap.put("Me", "G&");
             labelMap.put("Mc", "G&");
             labelMap.put("Nd", "G&");
             labelMap.put("Nl", "G&");
             labelMap.put("No", "G&");
             labelMap.put("Zs", "G&");
             labelMap.put("Pd", "G&");
             labelMap.put("Ps", "G&");
             labelMap.put("Pe", "G&");
             labelMap.put("Pc", "G&");
             labelMap.put("Po", "G&");
             labelMap.put("Pi", "G&");
             labelMap.put("Pf", "G&");
             labelMap.put("Sm", "G&");
             labelMap.put("Sc", "G&");
             labelMap.put("Sk", "G&");
             labelMap.put("So", "G&");

             labelMap.put("Zl", "Cf");
             labelMap.put("Zp", "Cf");

             rangeBreakSource =
                 new UnicodeProperty
                     .FilteredProperty(
                         getUnicodePropertyFactory().getProperty(
                             "General_Category"),
                         new UnicodeProperty.MapFilter(labelMap))
                     .setAllowValueAliasCollisions(true);

             /*
             "Cn", // = Other, Not Assigned 0
             "Cc", // = Other, Control 15
             "Cf", // = Other, Format 16
             UnicodeProperty.UNUSED, // missing
             "Co", // = Other, Private Use 18
             "Cs", // = Other, Surrogate 19
             */
         }
         return rangeBreakSource;
     }

     /**
      * @param label
      */
     public BagFormatter setRangeBreakSource(UnicodeLabel label) {
         if (label == null) label = UnicodeLabel.NULL;
         rangeBreakSource = label;
         return this;
     }

 }