blob: c112ab389b41e797f00b9d642c886bb63d99cc02 [file] [log] [blame]
/*
*******************************************************************************
* Copyright (C) 2002-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/PosixCollationBuilder.java,v $
* $Date: 2002/02/16 03:05:30 $
* $Revision: 1.2 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.tool.localeconverter;
import com.ibm.icu.lang.*;
import com.ibm.icu.text.*;
import java.io.*;
import java.util.*;
class PosixCollationBuilder {
private static final int MAX_WEIGHTS = 4;
private static final int MAX_COMPOSITION = 4;
private static int nextCharNumber = 1;
private Hashtable weightSymbols = new Hashtable();
private Hashtable contractions = new Hashtable();
private Hashtable rules = new Hashtable();
private CollationRule lastRule = null;
private PosixCharMap map;
private SortedVector order;
private static int FIRST_WEIGHT_CHAR = 0x0000F7FF;
private int nextWeightChar = FIRST_WEIGHT_CHAR;
private CollationRule ignoreRule; //rule for the collating-symbol IGNORE
public class CollationRule {
int charNumber;
String value;
int nextWeight = 0;
String[] weightSource = new String[MAX_WEIGHTS];
int weight[][] = null;
StringBuffer source = new StringBuffer();
public CollationRule(String symbol) {
charNumber= nextCharNumber++;
value = symbol;
for (int i = 0; i < MAX_WEIGHTS; i++) {
weightSource[i] = symbol;
}
source.append(map.backmapValue(symbol));
source.append("\t\t");
}
private CollationRule(CollationRule other, int composition) {
charNumber = other.charNumber;
value = other.value;
nextWeight = other.nextWeight;
for (int i = 0; i < MAX_WEIGHTS; i++) {
String source = other.weightSource[i];
if (source.length() > composition) {
weightSource[i] = ""+source.charAt(composition);
} else {
weightSource[i] = value;
}
}
}
//HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
public void addWeight(String symbol) {
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// myTranslit.transliterate(tSymbol);
//limit the size of a single weight
if (symbol.length() > MAX_COMPOSITION) {
System.err.println("WARNING: Weights of composition greater than "+MAX_COMPOSITION+" were truncated.");
symbol = symbol.substring(0, MAX_COMPOSITION);
}
//limit the number of weights
if (nextWeight < MAX_WEIGHTS) {
if (nextWeight > 0) {
source.append(";");
}
for (int i = 0; i < symbol.length(); i++) {
source.append(map.backmapValue(""+symbol.charAt(i)));
}
weightSource[nextWeight++] = symbol;
weight = null;
}
}
public int compare(CollationRule other) {
if (other == null) return compare(ignoreRule);
resolveWeights();
other.resolveWeights();
int compareSize = Math.min(getSize(), other.getSize());
for (int j = 0; j < compareSize; j++) {
for (int i = 0; i < MAX_WEIGHTS; i++) {
int diff = weight[j][i] - other.weight[j][i];
if (diff < 0) {
return -(i+1);
} if (diff > 0) {
return i+1;
}
}
}
return getSize() - other.getSize();
}
public boolean isMultiWeight() {
return getSize() > 1;
}
public int getSize() {
int size = 0;
for (int i = 1; i < weightSource.length; i++) {
size = Math.max(size, weightSource[i].length());
}
return size;
}
public CollationRule getComponent(int ndx) {
return new CollationRule(this, ndx);
}
public String getValue() {
return value;
}
public String getSymbol() {
String newValue = isContraction();
if (newValue != null) {
return newValue;
} else {
newValue = isWeightSymbol();
if (newValue != null) {
return newValue;
} else {
return value;
}
}
}
public String getSource() {
return source.toString();
}
private String isContraction() {
return (String)contractions.get(value);
}
private String isWeightSymbol() {
return (String)weightSymbols.get(value);
}
public CollationRule seeksToRule() {
CollationRule comp;
if (getSize() <= 1) {
comp = this; //save an object creation
} else {
comp = getComponent(0);
}
int ndx = order.indexOf(comp);
if (ndx == 0) {
return this;
} else {
CollationRule exp;
do {
exp = (CollationRule)order.elementAt(ndx--);
} while (ndx > 0 && exp.getSize() > 1);
return exp;
}
}
public String getExpansion() {
if (getSize() <= 1) {
return null;
} else {
StringBuffer expansion = new StringBuffer();
for (int j = 0; j < getSize(); j++) {
CollationRule comp = getComponent(j);
int ndx = order.indexOf(comp);
CollationRule exp;
do {
exp = (CollationRule)order.elementAt(ndx--);
} while (ndx >= 0 && exp.getSize() > 1);
expansion.append(exp.getSymbol());
}
return expansion.toString();
}
}
public String toString() {
return source.toString();
/* resolveWeights();
StringBuffer buf = new StringBuffer();
buf.append(charNumber);
buf.append(' ');
buf.append(value);
buf.append(' ');
buf.append(getSymbol());
buf.append(' ');
buf.append((isWeightSymbol() != null)?"W":" ");
buf.append(' ');
for (int i = 0; i < MAX_WEIGHTS; i++) {
buf.append(weightSource[i]);
buf.append(' ');
}
for (int i = 0; i < getSize(); i++) {
buf.append("[ ");
for (int j = 0; j < MAX_WEIGHTS; j++) {
int w = weight[i][j];
buf.append(w);
buf.append(' ');
}
buf.append(']');
}
return buf.toString();
*/
}
private void resolveWeights() {
if (weight == null) {
weight = new int[MAX_COMPOSITION][MAX_WEIGHTS];
for (int j = 0; j < MAX_WEIGHTS; j++) {
String symbol = weightSource[j];
if (symbol.length() <= 1) {
weight[0][j] = ordinalityOf(symbol);
} else {
for (int i = 0; i < symbol.length(); i++) {
char c = symbol.charAt(i);
weight[i][j] = ordinalityOf(""+c);
}
}
}
}
}
}
public PosixCollationBuilder(PosixCharMap map) {
this.map = map;
String ignoreSymbol = defineWeightSymbol("IGNORE");
ignoreRule = new CollationRule(ignoreSymbol);
rules.put(ignoreSymbol, ignoreRule);
lastRule = ignoreRule;
//{{INIT_CONTROLS
//}}
}
public String defineWeightSymbol(String symbol) {
order = null;
String c = nextFreeWeightChar();
map.defineMapping(symbol, c);
weightSymbols.put(c, symbol);
weightSymbols.put(symbol, c);
return c;
}
public String defineContraction(String symbol, String value) {
order = null;
String c = nextFreeWeightChar();
map.defineMapping(symbol, c);
contractions.put(c, value);
return c;
}
private String nextFreeWeightChar() {
String result = "";
String mappedSource;
do {
result = ""+(char)nextWeightChar--;
mappedSource = map.backmapValue(result);
} while (result != mappedSource);
return result;
}
public int ordinalityOf(String symbol) {
// HexToUnicodeTransliterator newTranslit = new HexToUnicodeTransliterator();
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// newTranslit.transliterate(tSymbol);
CollationRule w = (CollationRule)rules.get(symbol);
if (w != null) {
return w.charNumber;
} else {
System.err.print("ERROR: Weight symbol not found: ");
for (int i = 0 ; i < symbol.length(); i++) {
char c = symbol.charAt(i);
System.err.print("\\u");
System.err.print(HEX_DIGIT[(c & 0x0F000) >> 12]); // HEX_DIGIT works for octal
System.err.print(HEX_DIGIT[(c & 0x0F00) >> 8]); // HEX_DIGIT works for octal
System.err.print(HEX_DIGIT[(c & 0x00F0) >> 4]);
System.err.println(HEX_DIGIT[(c & 0x000F)]);
}
System.err.println(" Weight given maximum possible value.");
return Integer.MAX_VALUE;
}
}
// HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
public void addRule(String symbol) {
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// myTranslit.transliterate(tSymbol);
if (symbol.length() > 1) {
System.err.println("WARNING: Undefined element '"+symbol+"'. collating-symbol generated.");
symbol = defineWeightSymbol(symbol);
}
order = null;
lastRule = new CollationRule(symbol);
rules.put(symbol, lastRule);
}
public void addRule(CollationRule rule) {
order = null;
lastRule = rule;
rules.put(rule.value, rule);
}
public void addWeight(String weight) {
if (weight.length() > 1) {
//check to see if it's a bogus weight symbol.
weight = map.mapKey(weight);
}
order = null;
lastRule.addWeight(weight);
}
public Enumeration getRules() {
return rules.elements();
}
public SortedVector getSortOrder() {
if (order == null) {
order = new SortedVector(
new Comparator() {
public int compare(final Object i, final Object j) {
final CollationRule o1 = (CollationRule)i;
final CollationRule o2 = (CollationRule)j;
final boolean w1 = o1.isWeightSymbol() != null;
final boolean w2 = o2.isWeightSymbol() != null;
//sort weights first
if (w1 && !w2) {
return -1;
} else if (!w1 && w2) {
return 1;
} else {
return o1.compare(o2);
}
}
}
);
order.addElements(rules.elements());
//remove weight symbols from the list
int i;
for (i = 0; i < order.size(); i++) {
CollationRule r = (CollationRule)order.elementAt(i);
if (r.isWeightSymbol() == null) {
break;
}
}
order.removeElements(0, i);
}
return order;
}
static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
'8','9','A','B','C','D','E','F'};
//{{DECLARE_CONTROLS
//}}
}