/*
 *******************************************************************************
 * Copyright (C) 1996-2000, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/tools/normalizer/Attic/NormalizerBuilder.java,v $ 
 * $Date: 2000/03/10 04:17:56 $ 
 * $Revision: 1.3 $
 *
 *****************************************************************************************
 */

package com.ibm.tools.normalizer;

import java.io.*;
import java.util.*;
import com.ibm.text.*;
import com.ibm.util.CompactByteArray;
import com.ibm.util.CompactCharArray;

public final class NormalizerBuilder
{
    public static void main(String args[]) throws IOException {
        try {
            NormalizerBuilder foo = new NormalizerBuilder(args);
        } catch (Throwable e) {
            System.err.println(e.getLocalizedMessage());
            e.printStackTrace();
            System.in.read();
        }
    }

    private UInfo uinfo;

    private DecompMap decomps = new DecompMap();

    private DecompMap explodeCompat = new DecompMap();
    private DecompMap explodeOnly   = new DecompMap();

    private CompMap permutedCompositions = new CompMap();
    private CompMap binaryCompositions = new CompMap();

    private CharSet bases = new CharSet();
    private CharSet combining = new CharSet();

    private Map pairExplosions = new HashMap();

    private boolean fVerbose = false;
    private boolean fWriteData = false;
    private boolean fShowSizes = false;
    private boolean fPrompt = false;
    private boolean fJava = true;
    private boolean fCPP = false;

    /**
     * The highest Unicode character that has a canonical decomposition.
     * (i.e. largest char that can result from a primary canonical composition.)
     */
    char largestChar = 0;

    public NormalizerBuilder(String[] args) throws IOException
    {
        // Parse my command line
        for (int i = 0; i < args.length; i++)
        {
            if (args[i].equals("-data")) {
                uinfo = new UInfo(args[++i]);
            }
            else if (args[i].equals("-write")) {
                fWriteData = true;
            }
            else if (args[i].equals("-verbose")) {
                fVerbose = true;
            }
            else if (args[i].equals("-size")) {
                fShowSizes = true;
            }
            else if (args[i].equals("-prompt")) {
                fPrompt = true;
            }
            else if (args[i].equals("-java")) {
                fJava = true;
                fCPP = false;
            }
            else if (args[i].equals("-cpp")) {
                fCPP = true;
                fJava = false;
            }
        }
        if (uinfo == null) {
            uinfo = new UInfo("../src/data/unicode/UnicodeData.txt");
        }

        boolean canonicalOnly = true;

        createDecompositions();

        out("\nGenerating permuted compositions...");

        // Form the list of all the permuted sequences that are canonically
        // equivalent to the canonical decompositions.
        // As a by-product, find out which are not combining character sequences,

        for (char ch = 0; ch < 0xFFFF; ch++) {
            String decomp = decomps.get(ch);

            if (decomp != null) {
                boolean done = false;

                if (!uinfo.getFullDecomposition(ch,true).equals(
                            uinfo.getFullDecomposition(ch,false)))
                {
                    explodeCompat.put(ch, uinfo.getFullDecomposition(ch, false));
                    done = true;
                }
                if (uinfo.hasCanonicalDecomposition(ch) && decomp.length() > 1
                    && !uinfo.isExcludedComposition(ch) && uinfo.isCBS(decomp))
                {
                    if (decomp.length() <= 2) {
                        permutedCompositions.put(decomp, ch);
                    }
                    else {
                        List alternatives = concat(decomp.charAt(0),
                            jumble(decomp.substring(1, decomp.length())));

                        for (int i = 0; i < alternatives.size(); ++i)
                        {
                            String variant = (String)alternatives.get(i);
                            String normalized = uinfo.fixCanonical(variant);

                            if (normalized.equals(decomp)) {
                                permutedCompositions.put(variant, ch);
                            }
                        }
                    }
                    largestChar = ch;
                    done = true;
                }
                if (!done) {
                    explodeOnly.put(ch, decomp);    // Disparaged
                }
            }
        }

        out("\nLargest composed char: " + uinfo.hex(largestChar));

        // Form the binary compositions
        out("\nGenerating pairwise compositions...");

        Iterator list = permutedCompositions.keySet().iterator();
        while (list.hasNext()) {
            String decomp = (String)list.next();
            char ch = permutedCompositions.get(decomp);

            if (decomp.length() > 2) {
                //
                // If this is a composition of more than two characters,
                // see if its initial portion is also a composition.  If so, that lets
                // us build up this composed character iteratively.
                //
                for (int i = decomp.length()-1; i > 1; --i) {
                    String partial = decomp.substring(0,i);
                    char partialMap = permutedCompositions.get(partial);
                    if (partialMap != 0) {
                        decomp = partialMap + decomp.substring(i);
                        break;
                    }
                }
            }
            if (decomp.length() <= 2) {
                binaryCompositions.put(decomp, ch);
            } else {
                //
                // The composition takes more than two characters, and there's
                // no way to build it up from smaller ones.
                //
                if (decomp.equals(uinfo.fixCanonical(decomp)))
                {
                    // If the decomp is in canonical order, we're in trouble,
                    // since that means there's no way to generate this composed
                    // character from its canonically decomposed equivalent.
                    err("No pairwise compose of " + uinfo.hex(decomp) +
                            " > " + uinfo.hex(ch) + " " + uinfo.getName(ch,true) );
                }
                else {
                    // If the decomp is *not* in canonical order, it's not as
                    // bad, since composition will still work as long as
                    warn("No pairwise compose of non-canon " + uinfo.hex(decomp) +
                            " > " + uinfo.hex(ch) + " " + uinfo.getName(ch,true) );
                }
            }

            bases.add(decomp.charAt(0));

            // add to list of all combining characters in composites
            for (int q = 1; q < decomp.length(); ++q) {
                combining.add(decomp.charAt(q));
            }
        }


        // Generate the pairwise explosions, where a composed char + combining char
        // transforms into a different pair of characters, usually because the
        // canonical combining classes are reversed.

        out("\nGenerating exploding pairs....");

        List binaryValues = new ArrayList(binaryCompositions.values());
        Collections.sort(binaryValues);

        for (char addOn = 0; addOn < 0xFFFF; addOn++) {
            if (combining.contains(addOn))
            {
                list = binaryValues.iterator();

                while (list.hasNext()) {
                    MutableChar unichar = (MutableChar)list.next();
                    String chStr = String.valueOf(unichar.value);
                    String source = chStr + addOn;

                    String comp = binaryComposition(source);

                    if (comp.length() == 1) continue; // don't care if combines
                    if (comp.charAt(0) == addOn || comp.charAt(1) == addOn) continue; // rearranges

                    if (!source.equals(comp)) {
                        String decomp = fullDecomposition(source);
                        pairExplosions.put(source,comp);
                        bases.add(unichar);
                    }
                }
            }
        }

        buildDecompData();
        buildComposeData();
        out("Success!");

        if (fPrompt) {
            System.out.println("\nHit any key to continue...");
            System.in.read();
        }
    }

    public String fullDecomposition(String s) {
        return fullDecomposition(s, new StringBuffer()).toString();
    }

    public StringBuffer fullDecomposition(char ch, StringBuffer output) {
        String value = decomps.get(ch);
        if (value == null) {
            bubbleAppend(output, ch);
        }
        else {
            bubbleAppend(output, value);
        }
        return output;
    }

    public StringBuffer fullDecomposition(String s, StringBuffer output) {
        for (int i = 0; i < s.length(); ++i) {
            fullDecomposition(s.charAt(i),output);
        }
        return output;
    }

    public String binaryComposition(String sr) {
        // set up decomposed string, init variables
        StringBuffer output = new StringBuffer();
        StringBuffer decomp = new StringBuffer();

        if (sr.length() == 0) return output.toString();

        // First generate the full decomposition of the input string
        fullDecomposition(sr, decomp);
        int basePosition = 0;
        char base = decomp.charAt(0);
        output.append(base);

        // handle degenerate case--no base character at start
        if (uinfo.getCanonicalClass(base) != 0) {
            // later
        }

        // loop through, composing items with base
        for (int i = 1; i < decomp.length(); ++i) {
            char ch = decomp.charAt(i);
            short can = uinfo.getCanonicalClass(ch);

            char value = binaryCompositions.get(String.valueOf(base) + ch);

            if (value != 0 && noObstructions(output, basePosition, can)) {
                base = value;
                output.setCharAt(basePosition, base);
            } else if (can == 0) {
                basePosition = output.length();
                base = ch;
                output.append(ch);
            } else {
                bubbleAppend(output, ch, can);
            }
        }
        return output.toString();
    }

    public boolean noObstructions(StringBuffer buffer, int pos, short can) {
        for (int j = buffer.length()-1; j > pos; --j) {
            if (can == uinfo.getCanonicalClass(buffer.charAt(j))) {
                return false;
            }
        }
        return true;
    }

    public void bubbleAppend(StringBuffer buffer, char ch, short can) {
        for (int j = buffer.length()-1; j >= 0; --j) {
            if (can >= uinfo.getCanonicalClass(buffer.charAt(j))) {
                buffer.insert(j + 1, ch);
                return;
            }
        }
        buffer.insert(0, ch);
    }

    public void bubbleAppend(StringBuffer buffer, char ch) {
        bubbleAppend(buffer, ch, uinfo.getCanonicalClass(ch));
    }

    public void bubbleAppend(StringBuffer buffer, String s) {
        for (int i = 0; i < s.length(); ++i) {
            bubbleAppend(buffer, s.charAt(i));
        }
    }

    String getDecomposition(char ch) {
        return decomps.get(ch);
    }


    /**
     * Generate a Map of all decompositions in Unicode.
     * The keys in the map are MutableChar objects, one for each character that has a decomposition.
     * The values are String objects containing the full decomposition for the character,
     * in canonical order.
     */
    private void createDecompositions()
    {
        out("\nGenerating Full decompositions...");
        StringBuffer temp = new StringBuffer();

        short compatCount=0, canonCount=0;

        for (char ch = 0; ch < 0xFFFF; ++ch) {
            if (ch >= '\u4E00' && ch <= '\uD7A3') continue; // skip ideos

            short category = uinfo.getCategory(ch);

            if (category == uinfo.UNASSIGNED) continue; //skip reserved
            if (category == uinfo.CONTROL) continue;
            if (category == uinfo.FORMAT) continue;
            if (category == uinfo.PRIVATE_USE) continue;
            if (category == uinfo.SURROGATE) continue;

            boolean canon = uinfo.hasCanonicalDecomposition(ch);

            if (uinfo.hasCanonicalDecomposition(ch)) canonCount++;
            if (uinfo.hasCompatibilityDecomposition(ch)) compatCount++;

            if (canon || uinfo.hasCompatibilityDecomposition(ch)) {
                String decomp = uinfo.getFullDecomposition(ch, canon);
                temp.setLength(0);
                temp.append(decomp);
                uinfo.fixCanonical(temp);

                decomps.put(ch, temp.toString() );
            }
        }
    }

    static List concat(char ch, List a) {
        for (int i = 0; i < a.size(); ++i) {
            a.set(i, ch + (String)a.get(i));
        }
        return a;
    }

    /**
     * Return a list of Strings for all possible permutations of the
     * characters in the input string.
     */
    static List jumble (String source)
    {
        ArrayList result = new ArrayList();
        if (source.length() == 1) {
            result.add(source);
        } else for (int i = 0; i < source.length(); ++i) {
            result.addAll( concat( source.charAt(i),
                                   jumble(source.substring(0,i)
                                          + source.substring(i+1,source.length()))));
        }
        return result;
    }

    static final int STR_INDEX_SHIFT = 2;
    static final int STR_LENGTH_MASK = 0x0003;

    static final int DECOMP_RECURSE = 0x00008000;
    static final int DECOMP_MASK  = 0x00007FFF;

    /**
     * Generate a new "DecompData.java" that contains the CompactArray definitions
     * used in the {@link Normalizer.DECOMPOSE} operation.
     */
    void buildDecompData() throws IOException {

        out("\nGenerating DecompData.java....");
        //
        // For each Unicode character that has a decomposition, we put its
        // fully-decomposed form at the end of the "contents" string, followed
        // by a null, and we put its index in "contents" into the CompactArray.
        // If it does not have a decomposition, we store a bogus index.
        //
        // We do this first for all of the compatibility decompositions, save
        // the index in MAX_COMPAT, and then do it again for the canonical
        // decompositions.  When the array is used later, any character whose
        // decomp has an index greater than MAX_COMPAT is a canonical decomp.
        //
        int canonIndex = 0;
        int compatIndex = 0;

        // Map from Unicode character to replacement string index
        CompactCharArray offsets = new CompactCharArray((char)0);

        // We also need a place to store the replacement strings.  Add a char at
        // the front so that "0" won't be the index of any of the replacement strings.
        StringBuffer replace = new StringBuffer().append("\uffff");

        for (char ch = 0; ch < 0xFFFF; ch++) {
            if (uinfo.hasCompatibilityDecomposition(ch)) {
                compatIndex = putLength(replace, decomps.get(ch), 0);
                offsets.setElementAt(ch, (char)compatIndex);
            }
        }

        // Add the canonical decomps.  Their indices must be > compatIndex.
        for (char ch = 0; ch < 0xFFFF; ch++) {
            if (uinfo.hasCanonicalDecomposition(ch)) {

                if (ch == 0x0f77) {
                    out("0F77: decomps.get() = " + uinfo.hex(decomps.get(ch)));
                    out("0F77: fullDecomp = " + uinfo.hex(uinfo.getFullDecomposition(ch,false)));
                }

                canonIndex = putLength(replace, decomps.get(ch), compatIndex);

                // If this character's full compatibility decomposition is different from
                // its canonical decomp, that means one of the characters in its
                // canonical decomp itself has a compatibility decomp.  To deal with this,
                // we set a bit flag telling the decomposer to recurse on this character.

                if (!uinfo.getFullDecomposition(ch,true).equals(uinfo.getFullDecomposition(ch,false))) {
                    offsets.setElementAt(ch, (char)(canonIndex | DECOMP_RECURSE));
                } else {
                    offsets.setElementAt(ch, (char)canonIndex);
                }
            }
        }

        //
        // Now generate another CompactArray containing the combining class of every
        // character in Unicode
        //
        final byte BASE = 0;
        CompactByteArray canonClasses = new CompactByteArray(BASE);

        for (char ch = 0; ch < 0xFFFF; ch++) {
            short canonClass = uinfo.getCanonicalClass(ch);
            if (canonClass != 0) {
                canonClasses.setElementAt(ch, (byte)canonClass);
            }
        }

        // Finally, write the data out to a compilable Java source file

        if (fJava) {
            writeDecompData(new JavaWriter("../src/com/ibm/text/DecompData"),
                        canonIndex, compatIndex, BASE, offsets, replace, canonClasses);
        }

        if (fCPP) {
            writeDecompData(new CPPWriter("/intlwork/source/collate/CPP/dcmpdata", "DecompData"),
                        canonIndex, compatIndex, BASE, offsets, replace, canonClasses);
        }

        out("Decomp data: MAX_CANONICAL = " + canonIndex + ", MAX_DECOMP = " + compatIndex);

        if (fShowSizes) {
            int offsetSize = offsets.getIndexArray().length * 2 + offsets.getValueArray().length * 2;
            int canonSize = canonClasses.getIndexArray().length * 2 + canonClasses.getValueArray().length;
            int replaceLength = replace.length();

            out("Total runtime size of decomp data is "
                + (offsetSize + canonSize + replaceLength));

            out("  offsets:      " + offsetSize);
            out("  canonClasses: " + canonSize);
            out("  replace:      " + replaceLength);
        }
    }

    void writeDecompData(SourceWriter out, int maxCanon, int maxCompat, short BASE,
                        CompactCharArray offsets, StringBuffer contents,
                        CompactByteArray canonClasses)
    {
        out.write("MAX_CANONICAL",  maxCanon        );
        out.write("MAX_COMPAT",     maxCompat       );
        out.write("DECOMP_MASK",    DECOMP_MASK     );
        out.write("DECOMP_RECURSE", DECOMP_RECURSE  );
        out.write("BASE",           BASE            );
        out.write("offsets",        offsets         );
        out.write("contents",       contents        );
        out.write("canonClass",     canonClasses    );
        out.close();
    }


    //==========================================================================================
    // Methods for generating and writing the composition data
    //
    final int TYPE_MASK   = 0x0007;
    final int INDEX_MASK  = 0xFFF8;
    final int INDEX_SHIFT = 3;

    // MAX_BASES is used to map a 2-diminsional (base,combining) index pair onto a
    // one-dimensional CompactArray.  We could just use baseCount, but making it a power
    // of two allows slightly better compaction.

    final int MAX_BASES   = 1024;   // Product must be <= 64K
    final int MAX_COMBINE = 65536/MAX_BASES;

    final char                // for character types
        IGNORE = 0,
        BASE = 1,
        EXPLODING_BASE = 2,
        COMBINING = 3,
        INITIAL_JAMO = 4,
        MEDIAL_JAMO = 5,
        FINAL_JAMO = 6,
        HANGUL = 7;

    // These variables actually hold the composition data.
    short baseCount = 1;        // Leave 0 as an invalid index
    short combineCount = 1;     // Leave 0 as an invalid index
    short nccCount = 0;
    int   maxCompat = 0;
    int   maxCanon = 0;

    // This array contains types (from the set above) and indices into the "replace"
    // and "actions" arrays
    CompactCharArray lookup = new CompactCharArray(IGNORE);

    // We also need a place to store the strings that result from replacements,
    // explosions, and combinations.  Add a char at the front so that "0" won't
    // be the index of any of the replacement strings.
    StringBuffer replace = new StringBuffer().append(" ");

    // We need to represent each canonical character class as a single bit
    // so that we can OR together a mask of all combining char classes seen
    // Build an array that maps from combining class to bit mask.
    int[] classMap = new int[256];
    int[] typeMask;

    // Build a two-dimensional array of the action to take for each base/combining pair
    CompactCharArray actions = new CompactCharArray((char)0);

    char[] actionIndex;

    /**
     * Generate a new "ComposeData.java" that contains the CompactArray definitions
     * used in the {@link Normalizer.COMPOSE} operation.
     */
    void buildComposeData() throws IOException
    {
       out("\nGenerating ComposeData.java....");

        BitSet usedIndices = new BitSet();
        CharSet explodingBases = new CharSet();

        // Find all characters that are both bases *and* have compatibility
        // decompositions.  These are weird
        for (char ch = 0; ch < 0xFFFF; ch++) {
            if (bases.contains(ch) && uinfo.hasCompatibilityDecomposition(ch)) {
                //
                // Add this character's explosion to the replacement string list.
                // We're going to make sure that its "base index", i.e. the
                // index for it in the actions array, is the same as the
                // explosion's index in the replace string.  This lets
                // us use the same index for the character's two behaviors
                //
                int index = put(replace, explodeCompat.get(ch), 0);

                out(uinfo.hex(ch) + " is base and has compat explosion "
                                  + uinfo.hex(explodeCompat.get(ch)) );

                addChar(lookup, ch, EXPLODING_BASE, index);
                usedIndices.set(index);
                explodingBases.add(ch);
            }
        }

        // First add the base characters to the array.
        // At the same time, compute their indices.
        // Leave an empty base index of 0 as a placeholder for null operations.
        //

        for (char ch = 0; ch < 0xFFFF; ch++)
        {

            if (explodingBases.contains(ch)) {
                continue;
            }

            short cclass = uinfo.getCanonicalClass(ch);

            if (bases.contains(ch)) {
                // Make sure that we don't use a base index that was already used
                // for an exploding base character.
                while (usedIndices.get(baseCount)) {
                    baseCount++;
                }
                // Now add the character to lookup as a base
                addChar(lookup, ch, BASE, baseCount++);
            }
            if (combining.contains(ch)) {
                classMap[cclass] = 1;       // Mark this combining class as being used
                addChar(lookup, ch, COMBINING, combineCount++);
            }

            if (ch >= '\u1100' && ch < '\u1160') {
                addChar(lookup, ch, INITIAL_JAMO, 0);
            }
            if (ch >= '\u1161' && ch < '\u11a6') {
                addChar(lookup, ch, MEDIAL_JAMO, 0);
            }
            if (ch >= '\u11a7' && ch < '\u11fa') {
                addChar(lookup, ch, FINAL_JAMO, 0);
            }
            if (ch >= 0xac00 && ch <= 0xd7a4) {
                addChar(lookup, ch, HANGUL, 0);
            }

            // Add explosions for all compatibility decompositions,
            // including the Jamo --> Conjoining Jamo decomps.
            if (explodeCompat.contains(ch))
            {
                maxCompat = put(replace, explodeCompat.get(ch), 0);
                addExplosion(lookup, ch, maxCompat);
            }
        }

        // Now add the explosions resulting from canonical decompositions
        // These will all have indices greater than "maxCompat" so we can distinguish them.
        //
        for (char ch = 0; ch < 0xFFFF; ch++) {
            short cclass = uinfo.getCanonicalClass(ch);

            if (explodeOnly.contains(ch) && uinfo.hasCanonicalDecomposition(ch)) {
                maxCanon = put(replace, explodeOnly.get(ch), maxCompat);
                addExplosion(lookup, ch, maxCanon);
            }
            else if (!combining.contains(ch) && cclass != 0 && classMap[cclass] != 0) {
                //
                // If a combining character didn't happen to end up in one of
                // the pairwise combinations or explosions we use but still has
                // a combining class that is the same as a character we *do* use,
                // we need to save its class so that we don't combine things "past" it.
                //
                // However, if the character has an explosion we *don't* need it, because
                // we'll never see it, only the results of its explosion.
                //
                addChar(lookup, ch, COMBINING, 0);
                nccCount++;
            }
        }

        // Now run through the combining classes again and assign bitmasks
        // in the same ascending order as the canonical classes
        int maskShift = 0;
        for (int i = 0; i < 256; i++) {
            if (classMap[i] != 0) {
                classMap[i] = (1 << (maskShift++));
            }
        }
        if (maskShift > 32) {
            err(Integer.toString(maskShift) + "combining classes; max is 32");
        }
        out("# of combining classes is " + maskShift);

        out("baseCount=" + baseCount + ", combineCount=" + combineCount
                            + ", nccCount=" + nccCount);

        if (baseCount > MAX_BASES) {
            err(Integer.toString(baseCount) + " bases, limit is " + MAX_BASES);
            err(Integer.toString(combineCount) + " combining chars, limit is " + MAX_COMBINE);
        }

        // Now build the "actions" array that tells what to do when each base /
        // combining pair is seen.
        //
        // First do character pairs that combine into a single character...
        //
        Iterator iter = binaryCompositions.keySet().iterator();
        while (iter.hasNext()) {
            String source = (String)iter.next();
            char ch = binaryCompositions.get(source);

            int baseIndex = lookup.elementAt(source.charAt(0)) >>> INDEX_SHIFT;
            int combiningIndex = lookup.elementAt(source.charAt(1)) >>> INDEX_SHIFT;

            actions.setElementAt((char)(baseIndex + MAX_BASES*combiningIndex), ch);
        }


        //
        // Pair explosions: base/combining pairs that explode into something else
        // We're squeezing the indices for these in between MAX_COMPOSED and 0xFFFF,
        // which means they can't be indexes into the "replace" string; those are too big.
        // Instead they're indexes into the "actionIndex" array, which in turn contains
        // indices in "replace"
        //
        actionIndex = new char[ pairExplosions.size() ];
        short index = 0;

        iter = pairExplosions.keySet().iterator();
        while (iter.hasNext()) {
            String source = (String)iter.next();
            char base = source.charAt(0);
            char combining = source.charAt(1);

            int strIndex = put(replace, (String)pairExplosions.get(source), 0);
            actionIndex[index] = (char)strIndex;

            int baseIndex = lookup.elementAt(base) >>> INDEX_SHIFT;
            int combiningIndex = lookup.elementAt(combining) >>> INDEX_SHIFT;

            actions.setElementAt((char)(baseIndex + MAX_BASES*combiningIndex),
                                 (char)(index + largestChar));
            index++;
        }

        // Fill in the "type mask" array that maps from combining character index
        // to a bit mask representing the canonical combining class
        typeMask = new int[combineCount + nccCount];

        for (char ch = 0; ch < 0xFFFF; ch++) {
            int value = lookup.elementAt(ch);
            int type = value & TYPE_MASK;

            if (type == COMBINING) {
                int ind = value >>> INDEX_SHIFT;
                int cclass = uinfo.getCanonicalClass(ch);
                typeMask[ind] = classMap[cclass];
            }
        }

        if (fJava) {
            writeComposeData(new JavaWriter("../src/com/ibm/text/ComposeData"));
        }
        if (fCPP) {
           writeComposeData(new CPPWriter("/intlwork/source/collate/CPP/compdata", "ComposeData"));
        }

        if (fShowSizes) {
            int lookupSize = lookup.getIndexArray().length * 2 + lookup.getValueArray().length * 2;
            int actionSize = actions.getIndexArray().length * 2 + actions.getValueArray().length * 2;
            int actIndexSize = actionIndex.length * 2;
            int replaceSize = replace.length();
            int typeMaskSize = typeMask.length * 2;

            out("Total runtime size of compose data is "
                + (lookupSize + actionSize + actIndexSize + replaceSize + typeMaskSize));

            out("  lookup:       " + lookupSize);
            out("  actions:      " + actionSize);
            out("  actionIndex:  " + actIndexSize);
            out("  typeMask:     " + typeMaskSize);
            out("  replace:      " + replaceSize);
        }
    }

    void writeComposeData(SourceWriter out) {
        out.write("BASE_COUNT",         baseCount);
        out.write("COMBINING_COUNT",    combineCount);
        out.write("MAX_COMPAT",         maxCompat);
        out.write("MAX_CANON",          maxCanon);

        out.writeHex("MAX_COMPOSED",    largestChar);

        int maxIndex = replace.length();
        out.write("MAX_INDEX",          maxIndex    );
        out.write("INITIAL_JAMO_INDEX", maxIndex + 1);
        out.write("MEDIAL_JAMO_INDEX",  maxIndex + 2);

        out.write("MAX_BASES",          MAX_BASES  );
        out.write("MAX_COMBINE",        MAX_COMBINE);

        out.writeHex("TYPE_MASK",       TYPE_MASK);
        out.write("INDEX_SHIFT",        INDEX_SHIFT);

        // The character types
        out.write("IGNORE",             (int)IGNORE);
        out.write("BASE",               (int)BASE);
        out.write("EXPLODING_BASE",     (int)EXPLODING_BASE);
        out.write("COMBINING",          (int)COMBINING);
        out.write("INITIAL_JAMO",       (int)INITIAL_JAMO);
        out.write("MEDIAL_JAMO",        (int)MEDIAL_JAMO);
        out.write("FINAL_JAMO",         (int)FINAL_JAMO);
        out.write("HANGUL",             (int)HANGUL);

        out.write("lookup",         lookup        );
        out.write("actions",        actions       );
        out.write("actionIndex",    actionIndex   );
        out.write("replace",        replace       );
        out.write("typeMask",       typeMask      );

        out.close();
    }

    void addChar(CompactCharArray lookup, char ch, int type, int index)
    {
        // First make sure it's not already present
        if (lookup.elementAt(ch) != IGNORE)
        {
            char oldValue = lookup.elementAt(ch);
            err(typeName(type) + " char is also "
                  + typeName(oldValue & TYPE_MASK) + ": "
                  + uinfo.hex(ch) + "  " + uinfo.getName(ch,true));
        }
        else if ((index << INDEX_SHIFT) > 65536) {
            err("not enough bits: index " + index + " << INDEX_SHIFT = " + (index << INDEX_SHIFT));
        } else {
            lookup.setElementAt(ch, (char)(type | (index << INDEX_SHIFT)));
        }
    }

    void addExplosion(CompactCharArray lookup, char ch, int index)
    {
        // First make sure it doesn't already have an index
        char oldValue = lookup.elementAt(ch);
        int oldIndex = oldValue >>> INDEX_SHIFT;

        if (oldValue != IGNORE) {
            err("Exploding char is already " + typeName(oldValue & TYPE_MASK)
                             + " (index " + oldIndex + "): "
                             + uinfo.hex(ch) + "  " + uinfo.getName(ch,true));
        }

        if (oldIndex != 0) {
            err("Exploding char is already " + typeName(oldValue & TYPE_MASK)
                             + " (index " + oldIndex + "): "
                             + uinfo.hex(ch) + "  " + uinfo.getName(ch,true));
        }
        else if ((index << INDEX_SHIFT) > 65536) {
            err("not enough bits: index " + index + " << INDEX_SHIFT = " + (index << INDEX_SHIFT));
        } else {
            lookup.setElementAt(ch, (char)((oldValue & ~INDEX_MASK) | (index << INDEX_SHIFT)));
        }
    }

    String typeName(int type) {
        switch (type) {
            case IGNORE:            return "Ignored";
            case BASE:              return "Base";
            case EXPLODING_BASE:    return "Exploding Base";
            case COMBINING:         return "Combining";
            case INITIAL_JAMO:      return "Initial Jamo";
            case MEDIAL_JAMO:       return "Medial Jamo";
            case FINAL_JAMO:        return "Final Jamo";
            case HANGUL:            return "Hangul";
            default:                return "Unknown";
        }
    }


    static final int put(StringBuffer buf, String str, int minIndex)
    {
        str = str + '\u0000';   // Add trailing null

        int index = buf.toString().indexOf(str);
        if (index <= minIndex) {
            index = buf.length();
            buf.append(str);
        }
        return index;
    }

    static final int putLength(StringBuffer buf, String str, int minIndex) {
        int length = str.length();

        if (length >= (1 << STR_INDEX_SHIFT)) {
            // There's no room to store the length in the index, so
            // add a null terminator and use a 0 length to flag this
            str = str + '\u0000';
            length = 0;
        }

        int index = buf.toString().indexOf(str);
        if (index <= minIndex) {
            index = buf.length();
            buf.append(str);
        }
        return (index << STR_INDEX_SHIFT) | length;
    }

    //--------------------------------------------------------------------------------
    // Source file headers
    //

    static final String kCHeader =
         "/*\n"
        +" * (C) Copyright IBM Corp. 1997-1998 - All Rights Reserved\n"
        +" *\n"
        +" * The program is provided 'as is' without any warranty express or\n"
        +" * implied, including the warranty of non-infringement and the implied\n"
        +" * warranties of merchantibility and fitness for a particular purpose.\n"
        +" * IBM will not be liable for any damages suffered by you as a result\n"
        +" * of using the Program. In no event will IBM be liable for any\n"
        +" * special, indirect or consequential damages or lost profits even if\n"
        +" * IBM has been advised of the possibility of their occurrence. IBM\n"
        +" * will not be liable for any third party claims against you.\n"
        +" */\n"
        + "// This class is MACHINE GENERATED.  Run NormalizerBuilder to regenerate.\n"
        +"\n";

    void out(String str) {
        if (fVerbose) System.out.println(str);
    }
    void warn(String str) {
        System.err.println("Warning: " + str);
    }
    void err(String str) {
        System.err.println("ERROR:   " + str);
    }
}

//-----------------------------------------------------------------------------
// Utility classes
//-----------------------------------------------------------------------------

class DecompMap extends HashMap {
    public DecompMap() {
    }

    void put(char ch, String value) {
        put(new MutableChar(ch), value);
    }

    String get(char ch) {
        Object obj = get(probe.set(ch));
        return (obj != null) ? (String)obj : null;
    }

    boolean contains(char ch) {
        return containsKey(probe.set(ch));
    }

    MutableChar probe = new MutableChar(' ');
}

class CompMap extends HashMap {
    public CompMap() {
    }

    void put(String key, char value) {
        put(key, new MutableChar(value));
    }

    char get(String key) {
        Object obj = get((Object)key);
        return (obj != null) ? ((MutableChar)obj).value : 0;
    }
}

class CharSet extends HashSet {
    public CharSet() {
    }

    public void add(char ch) {
        add(new MutableChar(ch));
    }

    public boolean contains(char ch) {
        return contains(probe.set(ch));
    }
    MutableChar probe = new MutableChar(' ');
}

