src/com/ibm/icu/dev/tool/cldr/GenerateCldrTests.java - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 * Copyright (c) 2002-2004, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Mark Davis
 **********************************************************************
 */
 package com.ibm.icu.dev.tool.cldr;

 import java.io.File;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.io.Writer;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;

 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;

 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.DateFormat;
 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.SimpleDateFormat;
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.LocaleData;
 import com.ibm.icu.util.TimeZone;
 import com.ibm.icu.util.ULocale;

 import com.ibm.icu.dev.test.util.Relation;
 import com.ibm.icu.dev.test.util.SortedBag;
 import com.ibm.icu.dev.tool.UOption;
 import com.ibm.icu.dev.tool.cldr.ICUResourceWriter.Resource;
 import com.ibm.icu.dev.tool.cldr.ICUResourceWriter.ResourceTable;

 /**
  * Generated tests for CLDR. Currently, these are driven off of a version of ICU4J, and just
  * use the data from that.
  * TODO Get the data directly from the CLDR tree.
  * @author medavis
  */

 public class GenerateCldrTests {

     static private PrintWriter log;
     PrintWriter out;
     private static final int
         HELP1 = 0,
         HELP2 = 1,
         DESTDIR = 2,
         LOGDIR = 3,
         SOURCEDIR =4,
         MATCH = 5,
         FULLY_RESOLVED = 6;

     private static final UOption[] options = {
             UOption.HELP_H(),
             UOption.HELP_QUESTION_MARK(),
             UOption.DESTDIR().setDefault("C:\\DATA\\GEN\\cldr\\test\\"),
             UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault("C:\\DATA\\GEN\\cldr\\test\\"),
             UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\"),
             UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
             UOption.create("fullyresolved", 'f', UOption.NO_ARG),
     };

     CldrCollations cldrCollations;
     static String logDir = null, destDir = null;

 	public static void main(String[] args) throws Exception {
         UOption.parseArgs(args, options);
         log = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.txt");
         try {

 			//compareAvailable();
             //if (true) return;
 			//System.out.println(createCaseClosure(new UnicodeSet("[a{bc}{def}{oss}]")));
 			//System.out.println(createCaseClosure(new UnicodeSet("[a-zß{aa}]")));
             GenerateCldrTests t = new GenerateCldrTests();
 			//t.generate(new ULocale("hu"), null);
 			t.generate(options[MATCH].value);
 			/*
 			t.generate(new ULocale("da"));
 			t.generate(new ULocale("hu"));
 			t.generate(new ULocale("de"));
 			t.generate(new ULocale("ar@collation=direct"));
 			*/
 		} finally {
             log.close();
             System.out.println("Done");
 		}
     }

     /**
 	 *
 	 */
     /*
 	private static void compareAvailable() {
 		ULocale[] cols = Collator.getAvailableULocales();
         Locale[] alocs = NumberFormat.getAvailableLocales();
 		Set sCols = filter(cols);
         Set sLocs = filter(alocs);
         Set oldSLocs = new TreeSet(sCols);
         sLocs.removeAll(sCols);
         log.println("main - collation");
         showLocales(sLocs);
         sCols.removeAll(oldSLocs);
         log.println();
         log.println("collation - main");
         showLocales(sCols);
 	}
     */

 	/**
 	 * @param sLocs
 	 */
 	private static void showLocales(Set sLocs) {
 		for (Iterator it = sLocs.iterator(); it.hasNext();) {
             String s = (String) it.next();
         	log.println(s + "\t" + ULocale.getDisplayLanguage(s,"en"));
         }
 	}

 	/**
 	 * @param cols
 	 * @return
 	 */
 	private static Set filter(Object[] cols) {
 		Set result = new TreeSet();
         for (int i = 0; i < cols.length; ++i) {
         	String s = cols[i].toString();
             if (s.indexOf('_') >= 0) continue;
             result.add(s);
         }
         return result;
 	}

     Set addULocales(Object[] objects, Set target) {
     	for (int i = 0; i < objects.length; ++i) {
     		target.add(new ULocale(objects[i].toString()));
         }
         return target;
     }
     private void addLocale(ULocale item) {
         String lang = item.getLanguage();
         if (lang.length() == 0) return; // skip root
         ULocale parent = new ULocale(lang);
         //System.out.println(item + ", " + parent);
         parentToLocales.add(parent, item);
         /*
         RuleBasedCollator col = cldrCollations.getInstance(item);
         if (col == null) {
         	System.out.println("No collator for: " + item);
         }
         String rules = col.getRules(); // ((RuleBasedCollator)Collator.getInstance(item)).getRules();
         rulesToLocales.add(rules, item);
         localesToRules.put(item, rules);
         */
     }

     Set collationLocales; //  = addULocales(Collator.getAvailableULocales(), new TreeSet(ULocaleComparator));
     Set numberLocales = addULocales(NumberFormat.getAvailableLocales(), new TreeSet(ULocaleComparator));
     Set dateLocales = addULocales(DateFormat.getAvailableLocales(), new TreeSet(ULocaleComparator));
     Set allLocales = new TreeSet(ULocaleComparator);

     Map localesToRules = new HashMap();
     Relation.CollectionFactory cm = new Relation.CollectionMaker(ULocaleComparator);
     Relation rulesToLocales = new Relation(new TreeMap(), cm);
     Relation parentToLocales = new Relation(new TreeMap(ULocaleComparator), cm);

     void getLocaleList() {
         collationLocales = new TreeSet(ULocaleComparator);
         collationLocales.addAll(cldrCollations.getAvailableSet());
         /*
     	collationLocales = addULocales(new String[] { // HACK
                 "ga",
                 "nl",
                 "pt",
                 "de@collation=phonebook",
                 "es@collation=traditional",
                 "hi@collation=direct",
                 "zh@collation=pinyin",
                 "zh@collation=stroke",
                 "zh@collation=traditional",
         }, collationLocales);
         */
         allLocales.addAll(collationLocales);
         allLocales.addAll(numberLocales);
         allLocales.addAll(dateLocales);
         // HACK
         // get all collations with same rules

         for (Iterator it = allLocales.iterator(); it.hasNext();) {
 			addLocale((ULocale) it.next());
         }
         /*
         String[] others = new String[] {
                 "de@collation=phonebook",
                 "es@collation=traditional",
                 "hi@collation=direct",
                 "zh@collation=pinyin",
                 "zh@collation=stroke",
                 "zh@collation=traditional",
         };
         for (int i = 0; i < others.length; ++i) {
             addLocale(new ULocale(others[i]));
         }
         */
     }

     CldrOthers cldrOthers;

 	void generate(String pat) throws Exception {
         cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
         cldrOthers.show();
         //if (true) return;
         cldrCollations = new CldrCollations(options[SOURCEDIR].value + "collation" + File.separator, pat);
         cldrCollations.show();
         getLocaleList();

         Matcher m = Pattern.compile(pat).matcher("");
         for (Iterator it = parentToLocales.keySet().iterator(); it.hasNext();) {
             String p = it.next().toString();
             if (!m.reset(p).matches()) continue;
         	generate(new ULocale(p));
         }
     }

     private void generate(ULocale locale) throws Exception {
         out = BagFormatter.openUTF8Writer(options[DESTDIR].value, locale + ".xml");
         out.println("<?xml version='1.0' encoding='UTF-8' ?>");
         out.println("<!DOCTYPE ldml SYSTEM 'http://www.unicode.org/cldr/dtd/1.2/beta/cldrTest.dtd'>");
         out.println("<!-- For information, see readme.html -->");
         out.println(" <cldrTest version='1.2' base='" + locale + "'>");
         out.println(" <!-- " + BagFormatter.toXML.transliterate(
                 locale.getDisplayName(ULocale.ENGLISH) + " ["
                 + locale.getDisplayName(locale))
                 + "] -->");
         //generateItems(locale, numberLocales, NumberEquator, NumberShower);
         //generateItems(locale, dateLocales, DateEquator, DateShower);
         generateItems(locale, collationLocales, CollationEquator, CollationShower);
         out.println(" </cldrTest>");
         out.close();
         GenerateSidewaysView.generateBat(options[SOURCEDIR].value + "test" + File.separator, locale + ".xml", options[DESTDIR].value, locale + ".xml");
     }

     /*
      *
         // first pass through and get all the functional equivalents
         Map uniqueLocales = new TreeMap();

         String[] keywords = Collator.getKeywords();
         boolean [] isAvailable = new boolean[1];
         for (int i = 0; i < locales.length; ++i) {
             add(locales[i], uniqueLocales);
             if (true) continue; // TODO restore once Vladimir fixes
             for (int j = 0; j < keywords.length; ++j) {
                 String[] values = Collator.getKeywordValues(keywords[j]);
                 for (int k = 0; k < values.length; ++k) {
                     // TODO -- for a full job, would do all combinations of different keywords!
                     if (values[k].equals("standard")) continue;
                     add(new ULocale(locales[i] + "@" + keywords[j] + "=" + values[k]), uniqueLocales);
                     //ULocale other = Collator.getFunctionalEquivalent(keywords[j], locales[i], isAvailable);
                 }
             }
         }
         for (int i = 0; i < extras.length; ++i) {
             add(new ULocale(extras[i]), uniqueLocales);
         }
         // items are now sorted by rules. So resort by locale
         Map toDo = new TreeMap(ULocaleComparator);
         for (Iterator it = uniqueLocales.keySet().iterator(); it.hasNext();) {
             Object rules = it.next();
             Set s = (Set) uniqueLocales.get(rules);
             ULocale ulocale = (ULocale) s.iterator().next(); // get first one
             toDo.put(ulocale, s);
         }
         for (Iterator it = toDo.keySet().iterator(); it.hasNext();) {
             ULocale ulocale = (ULocale) it.next();
             Set s = (Set) toDo.get(ulocale);
             generate(ulocale);
         }
      */

     /**
      * add locale into list. Replace old if shorter
      * @param locale
      */
     void add(ULocale locale, Map uniqueLocales) {
         try {
 			RuleBasedCollator col = cldrCollations.getInstance(locale); // (RuleBasedCollator) Collator.getInstance(locale);
             // for our purposes, separate locales if we are using different exemplars
 			String key = col.getRules() + "\uFFFF" + getExemplarSet(locale, 0);
 			Set s = (Set) uniqueLocales.get(key);
 			if (s == null) {
 				s = new TreeSet(ULocaleComparator);
 			    uniqueLocales.put(key, s);
 			}
             System.out.println("Adding " + locale);
 			s.add(locale);
 		} catch (Throwable e) { // skip
             System.out.println("skipped " + locale);
 		}
     }

     /**
      * Work-around
      * @param locale
      * @param options
      * @return
      */
     public UnicodeSet getExemplarSet(ULocale locale, int options) {
         String n = locale.toString();
         int pos = n.indexOf('@');
         if (pos >= 0) locale = new ULocale(n.substring(0,pos));
         UnicodeSet result = cldrOthers.getExemplarSet(locale); // LocaleData.getExemplarSet(locale, options);
         if (options == 0) result.closeOver(UnicodeSet.CASE);
         return result;
     }

     public static final Comparator ULocaleComparator = new Comparator() {
 		public int compare(Object o1, Object o2) {
 			return o1.toString().compareTo(o2.toString());
 		}
     };

     public interface Equator {
         public boolean equals(Object o1, Object o2);
     }

     static boolean intersects(Collection a, Collection b) {
         for (Iterator it = a.iterator(); it.hasNext();) {
             if (b.contains(it.next())) return true;
         }
         return false;
     }

     static Collection extract(Object x, Collection a, Equator e, Collection output) {
         List itemsToRemove = new ArrayList();
         for (Iterator it = a.iterator(); it.hasNext();) {
             Object item = it.next();
             if (e.equals(x, item)) {
                 itemsToRemove.add(item); // have to do this because iterator may not allow
                 output.add(item);
             }
         }
         a.removeAll(itemsToRemove);
         return output;
     }

     class ResultsPrinter {
         Map settings = new TreeMap();
         Map oldSettings = new TreeMap();
         void set(String name, String value) {
             settings.put(name, value);
         }
         void print(String result) {
             out.print("   <result");
             for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
                 Object key = it.next();
                 Object value = settings.get(key);
                 if (!value.equals(oldSettings.get(key))) {
                     out.print(" " + key + "='" + BagFormatter.toXML.transliterate(value.toString()) + "'");
                     oldSettings.put(key, value);
                 }
             }
             out.println(">" + BagFormatter.toXML.transliterate(result) + "</result>");
         }
     }

     interface DataShower {
         void show(ULocale first, Collection others) throws Exception;
     }

     private void generateItems(ULocale locale, Collection onlyLocales, Equator test, DataShower generator) throws Exception {
         Collection sublocales = parentToLocales.get(locale, new ArrayList());
         sublocales.retainAll(onlyLocales);
         // get all the things that share the same behavior
         while (sublocales.size() != 0) {
             // start with the first one
             ULocale first = (ULocale) sublocales.iterator().next();
             Collection others = extract(first, sublocales, test, new ArrayList());
             generator.show(first, others);
         }
     }

     private void showLocales(String elementName, Collection others) {
         //System.out.println(elementName + ": " + locale);
         out.println("  <" + elementName + " ");
         StringBuffer comment = new StringBuffer();
         if (others != null && others.size() != 0) {
             out.print("locales='");
             boolean first = true;
             for (Iterator it = others.iterator(); it.hasNext();) {
                 if (first)
                     first = false;
                 else {
                     out.print(" ");
                     comment.append("; ");
                 }
                 ULocale loc = (ULocale) it.next();
                 out.print(loc);
                 comment.append(loc.getDisplayName(ULocale.ENGLISH) + " ["
                         + loc.getDisplayName(loc) + "]");
             }
             out.print("'");
         }
         out.println(">");
         out.println("<!-- "
                         + BagFormatter.toXML.transliterate(comment.toString())
                         + " -->");
     }

     // ========== DATES ==========

     static TimeZone utc = TimeZone.getTimeZone("GMT");
     static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
     {
     	iso.setTimeZone(utc);
     }
     static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL};
     static String[] DateFormatNames = {"none", "short", "medium", "long", "full"};
     private static DateFormat getDateFormat(ULocale locale, int i, int k) {
         Locale olocale = locale.toLocale(); // TODO replace once fixed!!
         DateFormat df;
         assert(olocale == null);
         if (DateFormatValues[k] == -1) df = DateFormat.getDateInstance(DateFormatValues[i], olocale);
         else if (DateFormatValues[i] == -1) df = DateFormat.getTimeInstance(DateFormatValues[k], olocale);
         else df = DateFormat.getDateTimeInstance(DateFormatValues[i], DateFormatValues[k], olocale);
         return df;
     }

     static Equator DateEquator = new Equator() {
         /**
          * Must both be ULocales
          */
 		public boolean equals(Object o1, Object o2) {
 			ULocale loc1 = (ULocale) o1;
             ULocale loc2 = (ULocale) o2;
             for (int i = 0; i < DateFormatValues.length; ++i) {
                 for (int j = 0; j < DateFormatValues.length; ++j) {
                     if (i == 0 && j == 0) continue; // skip null case
                     DateFormat df1 = getDateFormat(loc1, i, j);
                     NumberFormat nf = df1.getNumberFormat();
                     nf.setCurrency(NO_CURRENCY);
                     df1.setNumberFormat(nf);
                     DateFormat df2 = getDateFormat(loc2, i, j);
                     nf = df2.getNumberFormat();
                     nf.setCurrency(NO_CURRENCY);
                     df2.setNumberFormat(nf);
                     if (!df1.equals(df2)) {
                         df1.equals(df2);
                         return false;
                     }
                 }
             }
             return true;
 		}
     };

     DataShower DateShower = new DataShower() {
      public void show(ULocale locale, Collection others) throws ParseException {
         showLocales("date", others);

         String[] samples = {
                 "1900-01-31T00:00:00Z",
                 "1909-02-28T00:00:01Z",
                 "1918-03-26T00:59:59Z",
                 "1932-04-24T01:00:00Z",
                 "1945-05-20T01:00:01Z",
                 "1952-06-18T11:59:59Z",
                 "1973-07-16T12:00:00Z",
                 "1999-08-14T12:00:01Z",
                 "2000-09-12T22:59:59Z",
                 "2001-10-08T23:00:00Z",
                 "2004-11-04T23:00:01Z",
                 "2010-12-01T23:59:59Z",
         };

         ResultsPrinter rp = new ResultsPrinter();
         for (int j = 0; j < samples.length; ++j) {
             Date datetime = iso.parse(samples[j]);
             rp.set("input", iso.format(datetime));
             for (int i = 0; i < DateFormatValues.length; ++i) {
                 rp.set("dateType", DateFormatNames[i]);
                 for (int k = 0; k < DateFormatValues.length; ++k) {
                     if (DateFormatValues[i] == -1 && DateFormatValues[k] == -1) continue;
                     rp.set("timeType", DateFormatNames[k]);
                 	DateFormat df = getDateFormat(locale, i, k);
                     df.setTimeZone(utc);
                 	rp.print(df.format(datetime));
                 }
             }
         }
         out.println("  </date>");
     }};

     // ========== NUMBERS ==========

 	static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific"};
     private static NumberFormat getNumberFormat(ULocale ulocale, int i) {
         Locale olocale = ulocale.toLocale();
         NumberFormat nf = null;
         switch(i) {
         case 0: nf = NumberFormat.getInstance(olocale); break;
         case 1: nf = NumberFormat.getIntegerInstance(olocale); break;
         case 2: nf = NumberFormat.getNumberInstance(olocale); break;
         case 3: nf = NumberFormat.getPercentInstance(olocale); break;
         case 4: nf = NumberFormat.getScientificInstance(olocale); break;
         default: throw new IllegalArgumentException("Unknown NumberFormat: " + i);
         }
         return nf;
     }

     static Currency NO_CURRENCY = Currency.getInstance("XXX");

     static Equator NumberEquator = new Equator() {
         /**
          * Must both be ULocales
          */
         public boolean equals(Object o1, Object o2) {
             ULocale loc1 = (ULocale) o1;
             ULocale loc2 = (ULocale) o2;
             for (int i = 0; i < NumberNames.length; ++i) {
                 NumberFormat nf1 = getNumberFormat(loc1, i);
                 nf1.setCurrency(NO_CURRENCY);
                 NumberFormat nf2 = getNumberFormat(loc2, i);
                 nf2.setCurrency(NO_CURRENCY);
             	if (!nf1.equals(nf2)) {
                     //nf1.equals(nf2);
                     return false;
                 }
             }
             return true;
         }
     };

     DataShower NumberShower = new DataShower() {
         public void show(ULocale locale, Collection others) throws ParseException {
         showLocales("number", others);

         double[] samples = {
                 0,
                 0.01, -0.01,
                 1, -1,
                 123.456, -123.456,
                 123456.78, -123456.78,
                 Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY,
                 Double.NaN
         };
         ResultsPrinter rp = new ResultsPrinter();
         for (int j = 0; j < samples.length; ++j) {
             double sample = samples[j];
             rp.set("input", String.valueOf(sample));
             for (int i = 0; i < NumberNames.length; ++i) {
                 rp.set("numberType", NumberNames[i]);
                 NumberFormat nf = getNumberFormat(locale, i);
                 rp.print(nf.format(sample));
             }
         }
         out.println("  </number>");
     }};


     // ========== COLLATION ==========

     Equator CollationEquator = new Equator() {
         /**
          * Must both be ULocales
          */
         public boolean equals(Object o1, Object o2) {
             ULocale loc1 = (ULocale) o1;
             ULocale loc2 = (ULocale) o2;
             return cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance(loc2)); // Collator.getInstance(loc1).equals(Collator.getInstance(loc2));
         }
     };

     DataShower CollationShower = new DataShower() {
     	public void show(ULocale locale, Collection others) {
         showLocales("collation", others);

 		Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale);

 		UnicodeSet tailored = col.getTailoredSet();
         if (locale.getLanguage().equals("zh")) {
             tailored.addAll(new UnicodeSet("[[a-z]-[v]]"));
             log.println("HACK for Pinyin");
         }
         tailored = createCaseClosure(tailored);
         tailored = nfc(tailored);
         //System.out.println(tailored.toPattern(true));

 		UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE);
         // add all the exemplars
         if (false) for (Iterator it = others.iterator(); it.hasNext(); ) {
             exemplars.addAll(getExemplarSet((ULocale)it.next(), UnicodeSet.CASE));
         }

         exemplars = createCaseClosure(exemplars);
         exemplars = nfc(exemplars);
         //System.out.println(exemplars.toPattern(true));
 		tailored.addAll(exemplars);
         //UnicodeSet tailoredMinusHan = new UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET);
         if (!exemplars.containsAll(tailored)) {
         	//BagFormatter bf = new BagFormatter();
             log.println("In Tailored, but not Exemplar; Locale: " + locale + "\t" + locale.getDisplayName());
             log.println(new UnicodeSet(tailored).removeAll(exemplars).toPattern(false));
             //bf.(log,"tailored", tailored, "exemplars", exemplars);
             log.flush();
         }
         tailored.addAll(new UnicodeSet("[\\ .02{12}]"));
         tailored.removeAll(SKIP_COLLATION_SET);

         SortedBag bag = new SortedBag(col);
 		doCollationResult(col, tailored, bag);
 		out.println("  </collation>");
 	}};
     static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet(
             "[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]");

 	/**
 	 * @param col
 	 * @param tailored
 	 * @param bag
 	 */
 	private void doCollationResult(Collator col, UnicodeSet tailored, SortedBag bag) {
 		for (UnicodeSetIterator usi = new UnicodeSetIterator(tailored); usi.next(); ) {
 		    String s = usi.getString();
 		    bag.add('x' + s);
 		    bag.add('X' + s);
 		    bag.add('x' + s + 'x');
 		}
 		//out.println("   <set locale='" + locale + "'/>");
         /*
         if (others != null) for (Iterator it = others.iterator(); it.hasNext(); ) {
             ULocale uloc = (ULocale) it.next();
             if (uloc.equals(locale)) continue;
         	out.println("   <other locale='" + uloc + "'/>");
         }
         */
 		out.println("   <result>");
 		String last = "";
 		boolean needEquals = false;
 		for (Iterator it = bag.iterator(); it.hasNext(); ) {
 		    String s = (String) it.next();
 		    if (col.compare(s, last) != 0) {
 		        if (needEquals) out.println(last);
                 needEquals = false;
                 last = s;
 		    } else {
 		    	needEquals = true;
 		    }
 		    out.println(BagFormatter.toXML.transliterate(s));

 		}
 		out.println("   </result>");
 	}

     static public Set getMatchingXMLFiles(String sourceDir, String localeRegex) {
         Matcher m = Pattern.compile(localeRegex).matcher("");
         Set s = new TreeSet();
         File[] files = new File(sourceDir).listFiles();
         for (int i = 0; i < files.length; ++i) {
             String name = files[i].getName();
             if (!name.endsWith(".xml")) continue;
             if (name.startsWith("supplementalData")) continue;
             String locale = name.substring(0,name.length()-4); // drop .xml
             if (!locale.equals("root") && !m.reset(locale).matches()) continue;
             s.add(locale);
         }
         return s;
     }

     static class CldrOthers {
         Map ulocale_exemplars = new TreeMap(ULocaleComparator);
         Map uniqueExemplars = new HashMap();
         String sourceDir;
         Set locales = new TreeSet(ULocaleComparator);

         UnicodeSet getExemplarSet(ULocale locale) {
         	return (UnicodeSet) ulocale_exemplars.get(locale);
         }

         void show() {
             log.println("Showing Locales");
             log.println("Unique Exemplars: " + uniqueExemplars.size());
             for (Iterator it2 = ulocale_exemplars.keySet().iterator(); it2.hasNext();) {
                 ULocale locale = (ULocale) it2.next();
                 UnicodeSet us = getExemplarSet(locale);
                 log.println("\t" + locale + ", " + us);
             }
         }
         static final ULocale ROOT = new ULocale("root"); // since CLDR has different root.

         CldrOthers(String sourceDir, String localeRegex) {
             this.sourceDir = sourceDir;
             Set s = getMatchingXMLFiles(sourceDir, localeRegex);
             for (Iterator it = s.iterator(); it.hasNext();) {
                 getInfo((String) it.next());
             }
             // now do inheritance manually
             for (Iterator it = locales.iterator(); it.hasNext();) {
                 ULocale locale = (ULocale) it.next();
             	UnicodeSet ex = (UnicodeSet) ulocale_exemplars.get(locale);
                 if (ex != null) continue;
                 for (ULocale parent = locale.getFallback(); parent != null; parent = parent.getFallback()) {
                     ULocale fixedParent = parent.getLanguage().length() == 0 ? ROOT : parent;
                 	ex = (UnicodeSet) ulocale_exemplars.get(fixedParent);
                     if (ex == null) continue;
                     ulocale_exemplars.put(locale, ex);
                     break;
                 }
             }

         }
         void getInfo(String locale) {
             System.out.println("Getting info for: " + locale);
             locales.add(new ULocale(locale));
             Document doc;
             if (options[FULLY_RESOLVED].doesOccur) {
 				doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale,
 						false, false, false);
 			} else {
 				doc = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
 			}
             Node node = LDMLUtilities.getNode(doc, "//ldml/characters/exemplarCharacters");
             if (node == null) return;
             if (isDraft(node)) System.out.println("Skipping draft: " + locale + ", " + getXPath(node));
         	String exemplars = LDMLUtilities.getNodeValue(node);
             UnicodeSet exemplarSet = new UnicodeSet(exemplars);
             UnicodeSet fixed = (UnicodeSet) uniqueExemplars.get(exemplarSet);
             if (fixed == null) {
             	uniqueExemplars.put(exemplarSet, exemplarSet);
                 fixed = exemplarSet;
             }
             ulocale_exemplars.put(new ULocale(locale), fixed);
         }
     }

     public static boolean isDraft(Node node) {
         for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node.getParentNode()){
             NamedNodeMap attributes = node.getAttributes();
             if (attributes == null) continue;
             for (int i = 0; i < attributes.getLength(); ++i) {
                 Node attribute = attributes.item(i);
                 if (attribute.getNodeName().equals("draft") && attribute.getNodeValue().equals("true")) return true;
             }
         }
         return false;
     }

     public static String getXPath(Node node) {
         StringBuffer xpathFragment = new StringBuffer();
         StringBuffer xpath = new StringBuffer();
         for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node.getParentNode()){
             xpathFragment.setLength(0);
             xpathFragment.append('/').append(node.getNodeName());
             NamedNodeMap attributes = node.getAttributes();
             if (attributes != null) {
                 for (int i = 0; i < attributes.getLength(); ++i) {
                 	Node attribute = attributes.item(i);
                 	xpathFragment.append("[@").append(attribute.getNodeName()).append('=')
 					    .append(attribute.getNodeValue()).append(']');
                 }
             }
             xpath.insert(0, xpathFragment);
         }
         xpath.insert(0, '/');
         return xpath.toString();
     }

     public static String getParent(String locale) {
         int pos = locale.lastIndexOf('_');
         if (pos >= 0) {
             return locale.substring(0,pos);
         }
         if (!locale.equals("root")) return "root";
         return null;
     }


     static class CldrCollations {
         Set validLocales = new TreeSet();
         Map ulocale_rules = new TreeMap(ULocaleComparator);
         Map locale_types_rules = new TreeMap();
         String sourceDir;
         Map collation_collation = new HashMap();
         RuleBasedCollator emptyCollator = (RuleBasedCollator) Collator.getInstance(new ULocale(""));

         public Set getAvailableSet() {
         	return ulocale_rules.keySet();
         }

 		public RuleBasedCollator getInstance(ULocale locale) {
 			return (RuleBasedCollator) ulocale_rules.get(locale);
 		}

 		void show() {
             log.println("Showing Locales");
             log.println("Unique Collators: " + collation_collation.size());
         	for (Iterator it2 = ulocale_rules.keySet().iterator(); it2.hasNext();) {
                 ULocale locale = (ULocale) it2.next();
                 RuleBasedCollator col = (RuleBasedCollator) ulocale_rules.get(locale);
                 log.println("\t" + locale + ", " + col.getRules());
             }
         }

         CldrCollations(String sourceDir, String localeRegex) throws Exception {
         	this.sourceDir = sourceDir;
             Set s = getMatchingXMLFiles(sourceDir, localeRegex);
             for (Iterator it = s.iterator(); it.hasNext();) {
             	getCollationRules((String) it.next());
             }

             // now fixup the validLocales, adding in what they inherit
             // TODO, add check: validSubLocales are masked by intervening locales.
             for (Iterator it = validLocales.iterator(); it.hasNext(); ) {
                 String locale = (String) it.next();
             	Map types_rules = (Map) locale_types_rules.get(locale);
                 if (types_rules != null) log.println("Weird: overlap in validLocales: " + locale);
                 else {
                     for (String parentlocale = getParent(locale); parentlocale != null; locale = getParent(parentlocale)) {
                         types_rules = (Map) locale_types_rules.get(parentlocale);
                         if (types_rules != null) {
                         	locale_types_rules.put(locale, types_rules);
                             break;
                         }
                     }
                 }
             }
             // now generate the @-style locales
             for (Iterator it = locale_types_rules.keySet().iterator(); it.hasNext(); ) {
             	String locale = (String) it.next();
                 Map types_rules = (Map) locale_types_rules.get(locale);
                 for (Iterator it2 = types_rules.keySet().iterator(); it2.hasNext(); ) {
                 	String type = (String) it2.next();
                     RuleBasedCollator col = (RuleBasedCollator) types_rules.get(type);
                     String name = type.equals("standard") ? locale : locale + "@collation=" + type;
                     ulocale_rules.put(new ULocale(name), col);
                 }
             }
         }

         public static String replace(String source, String pattern, String replacement) {
             // dumb code for now
             for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
             	source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
             }
             return source;
         }
         static Transliterator fromHex = Transliterator.getInstance("hex-any");

         private void getCollationRules(String locale) throws Exception {
             System.out.println(locale);
             Document doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale, false, false, false);
             Node node = LDMLUtilities.getNode(doc, "//ldml/collations");
             LDML2ICUConverter cnv = new LDML2ICUConverter();
             StringBuffer stringBuffer = new StringBuffer();
             ICUResourceWriter.ResourceTable resource = (ICUResourceWriter.ResourceTable) cnv.parseCollations(node, stringBuffer);
             Map types_rules = new TreeMap();
             locale_types_rules.put(locale, types_rules);
             for (Resource current = resource.first; current != null; current = current.next) {
                 //System.out.println(current.name);
                 if (current instanceof ICUResourceWriter.ResourceTable) {
                     ICUResourceWriter.ResourceTable table = (ICUResourceWriter.ResourceTable) current;
                     for (Resource current2 = table.first; current2 != null; current2 = current2.next) {
                     	if (current2 instanceof ICUResourceWriter.ResourceString) {
                     		ICUResourceWriter.ResourceString foo = (ICUResourceWriter.ResourceString) current2;
                     		//System.out.println("\t" + foo.name + ", " + foo.val);
                             /* skip since the utilities have the wrong value
                             if (current.name.equals("validSubLocales")) {
                                 // skip since it is wrong
                                 log.println("Valid Sub Locale: " + foo.name);
                             	validLocales.add(foo.name);
                             } else
                             */
                             if (foo.name.equals("Sequence")) {
                                 // remove the \ u's, because they blow up
                                 String rules = fromHex.transliterate(foo.val);
                                 RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, rules);
                                 if (fixed != null) {
                                     log.println("Rules for: " + locale + "," + current.name);
                                     log.println(rules);
                                     if (!rules.equals(foo.val)) {
                                         log.println("Original Rules from Ram: ");
                                         log.println(foo.val);
                                     }
                                     types_rules.put(current.name, fixed);
                                 }
                             }
                         }
                     }
                 }
                 //current.write(System.out,0,false);
             }
             // now get the valid sublocales
             Document doc2 = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
             Node colls = LDMLUtilities.getNode(doc2,"//ldml/collations");
             String validSubLocales = LDMLUtilities.getAttributeValue(colls, "validSubLocales");
             if (validSubLocales != null) {
                 String items[] = new String[100]; // allocate plenty
                 Utility.split(validSubLocales, ' ', items);
                 for (int i = 0; items[i].length() != 0; ++i) {
                     log.println("Valid Sub Locale: " + items[i]);
                     validLocales.add(items[i]);
                 }
             }
         }

 		/**
 		 * @param locale
 		 * @param current
 		 * @param foo
 		 * @param rules
 		 */
 		private RuleBasedCollator generateCollator(String locale, String current, String foo, String rules) {
             RuleBasedCollator fixed = null;
 			try {
 			    if (rules.equals("")) fixed = emptyCollator;
 			    else {
 			        rules = replace(rules, "[optimize[", "[optimize [");
 			        rules = replace(rules, "[suppressContractions[", "[suppressContractions [");
 			        RuleBasedCollator col = new RuleBasedCollator(rules);
 			        fixed = (RuleBasedCollator) collation_collation.get(col);
 			        if (fixed == null) {
 			        	collation_collation.put(col, col);
 			            fixed = col;
 			        }
 			    }
 			} catch (Exception e) {
 				log.println("***Cannot create collator from: " + locale + ", " + current + ", " + foo + ", " + rules);
 			    e.printStackTrace(log);
 			    RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(new ULocale(locale));
 			    String oldrules = coll.getRules();
 			    log.println("Old ICU4J: " + oldrules);
 			    log.println("Equal?: " + oldrules.equals(rules));
 			}
 			return fixed;
 		}
     }
     // ========== UNICODESET UTILITIES ==========

     public static interface Apply {
     	String apply(String source);
     }
     static UnicodeSet apply(UnicodeSet source, Apply apply) {
         UnicodeSet target = new UnicodeSet();
         for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next(); ) {
             String s = usi.getString();
             target.add(apply.apply(s));
         }
         return target;
     }
     static UnicodeSet nfc(UnicodeSet source) {
         return apply(source, new Apply() {
 			public String apply(String source) {
 				return Normalizer.compose(source, false);
 			}
         });
     }

     public static interface CloseCodePoint {
     	/**
     	 * @param cp code point to get closure for
     	 * @param toAddTo Unicode set for the closure
     	 * @return toAddTo (for chaining)
     	 */
     	UnicodeSet close(int cp, UnicodeSet toAddTo);
     }

     public static UnicodeSet createCaseClosure(UnicodeSet source) {
         UnicodeSet target = new UnicodeSet();
         for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next(); ) {
             String s = usi.getString();
             UnicodeSet temp = createClosure(s, CCCP);
             if (temp == null) target.add(s);
             else target.addAll(temp);
         }
         return target;
     }

     public static final CloseCodePoint CCCP = new CloseCodePoint() {
         Locale locale = Locale.ENGLISH;
         UnicodeSet NONE = new UnicodeSet();
         UnicodeMap map = new UnicodeMap();

 		public UnicodeSet close(int cp, UnicodeSet toAddTo) {
             UnicodeSet result = (UnicodeSet) map.getValue(cp);
             if (result == null) {
     			result = new UnicodeSet();
                 result.add(cp);
                 String s = UCharacter.toLowerCase(locale, UTF16.valueOf(cp));
                 result.add(s);
                 s = UCharacter.toUpperCase(locale, UTF16.valueOf(cp));
                 result.add(s);
                 s = UCharacter.toTitleCase(locale, UTF16.valueOf(cp), null);
                 result.add(s);
                 // special hack
                 if (result.contains("SS")) result.add("sS").add("ss");
                 if (result.size() == 1) result = NONE;
                 map.put(cp, result);
             }
             if (result != NONE) toAddTo.addAll(result);
             else toAddTo.add(cp);
             return toAddTo;
 		}
     };

     public static UnicodeSet createClosure(String source, CloseCodePoint closer) {
     	return createClosure(source, 0, closer);
     }
     public static UnicodeSet createClosure(String source, int position, CloseCodePoint closer) {
         UnicodeSet result = new UnicodeSet();
         // if at end, return empty set
         if (position >= source.length()) return result;
     	int cp = UTF16.charAt(source, position);
         // if last character, return its set
         int endPosition = position + UTF16.getCharCount(cp);
         if (endPosition >= source.length()) return closer.close(cp, result);
         // otherwise concatenate its set with the remainder
         UnicodeSet remainder = createClosure(source, endPosition, closer);
         return createAppend(closer.close(cp, result), remainder);
     }

     /**
      * Produce the result of appending each element of this to each element of other.
      * That is, [a{cd}] + [d{ef}] => [{ad}{aef}{cdd}{cdef}]
      * @param other
      * @return
      */
     public static UnicodeSet createAppend(UnicodeSet a, UnicodeSet b) {
         UnicodeSet target = new UnicodeSet();
         for (UnicodeSetIterator usi = new UnicodeSetIterator(a); usi.next(); ) {
             String s = usi.getString();
             for (UnicodeSetIterator usi2 = new UnicodeSetIterator(b); usi2.next(); ) {
                 String s2 = usi2.getString();
                 target.add(s + s2);
             }
         }
         return target;
     }
 }