blob: 1acde045e4ed884544748d40279decf8c457f0ce [file] [log] [blame]
* Copyright (C) 2002-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/,v $
* $Date: 2003/09/10 23:36:08 $
* $Revision: 1.3 $
import java.util.*;
class PosixCollationBuilder {
private static final int MAX_WEIGHTS = 4;
private static final int MAX_COMPOSITION = 4;
private static int nextCharNumber = 1;
private Hashtable weightSymbols = new Hashtable();
private Hashtable contractions = new Hashtable();
private Hashtable rules = new Hashtable();
private CollationRule lastRule = null;
private PosixCharMap map;
private SortedVector order;
private static int FIRST_WEIGHT_CHAR = 0x0000F7FF;
private int nextWeightChar = FIRST_WEIGHT_CHAR;
private CollationRule ignoreRule; //rule for the collating-symbol IGNORE
public class CollationRule {
int charNumber;
String value;
int nextWeight = 0;
String[] weightSource = new String[MAX_WEIGHTS];
int weight[][] = null;
StringBuffer source = new StringBuffer();
public CollationRule(String symbol) {
charNumber= nextCharNumber++;
value = symbol;
for (int i = 0; i < MAX_WEIGHTS; i++) {
weightSource[i] = symbol;
private CollationRule(CollationRule other, int composition) {
charNumber = other.charNumber;
value = other.value;
nextWeight = other.nextWeight;
for (int i = 0; i < MAX_WEIGHTS; i++) {
String source = other.weightSource[i];
if (source.length() > composition) {
weightSource[i] = ""+source.charAt(composition);
} else {
weightSource[i] = value;
//HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
public void addWeight(String symbol) {
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// myTranslit.transliterate(tSymbol);
//limit the size of a single weight
symbol = unescape(symbol);
if (symbol.length() > MAX_COMPOSITION) {
System.err.println("WARNING: Weights of composition greater than "+MAX_COMPOSITION+" were truncated.");
symbol = symbol.substring(0, MAX_COMPOSITION);
//limit the number of weights
if (nextWeight < MAX_WEIGHTS) {
if (nextWeight > 0) {
for (int i = 0; i < symbol.length(); i++) {
weightSource[nextWeight++] = symbol;
weight = null;
public int compare(CollationRule other) {
if (other == null) return compare(ignoreRule);
int compareSize = Math.min(getSize(), other.getSize());
for (int j = 0; j < compareSize; j++) {
for (int i = 0; i < MAX_WEIGHTS; i++) {
int diff = weight[j][i] - other.weight[j][i];
if (diff < 0) {
return -(i+1);
} if (diff > 0) {
return i+1;
return getSize() - other.getSize();
public boolean isMultiWeight() {
return getSize() > 1;
public int getSize() {
int size = 0;
for (int i = 1; i < weightSource.length; i++) {
size = Math.max(size, weightSource[i].length());
return size;
public CollationRule getComponent(int ndx) {
return new CollationRule(this, ndx);
public String getValue() {
return value;
public String getSymbol() {
String newValue = isContraction();
if (newValue != null) {
return newValue;
} else {
newValue = isWeightSymbol();
if (newValue != null) {
return newValue;
} else {
return value;
public String getSource() {
return source.toString();
private String isContraction() {
return (String)contractions.get(value);
private String isWeightSymbol() {
return (String)weightSymbols.get(value);
public CollationRule seeksToRule() {
CollationRule comp;
if (getSize() <= 1) {
comp = this; //save an object creation
} else {
comp = getComponent(0);
int ndx = order.indexOf(comp);
if (ndx == 0) {
return this;
} else {
CollationRule exp;
do {
exp = (CollationRule)order.elementAt(ndx--);
} while (ndx > 0 && exp.getSize() > 1);
return exp;
public String getExpansion() {
if (getSize() <= 1) {
return null;
} else {
StringBuffer expansion = new StringBuffer();
for (int j = 0; j < getSize(); j++) {
CollationRule comp = getComponent(j);
int ndx = order.indexOf(comp);
CollationRule exp;
do {
exp = (CollationRule)order.elementAt(ndx--);
} while (ndx >= 0 && exp.getSize() > 1);
return expansion.toString();
public String toString() {
return source.toString();
/* resolveWeights();
StringBuffer buf = new StringBuffer();
buf.append(' ');
buf.append(' ');
buf.append(' ');
buf.append((isWeightSymbol() != null)?"W":" ");
buf.append(' ');
for (int i = 0; i < MAX_WEIGHTS; i++) {
buf.append(' ');
for (int i = 0; i < getSize(); i++) {
buf.append("[ ");
for (int j = 0; j < MAX_WEIGHTS; j++) {
int w = weight[i][j];
buf.append(' ');
return buf.toString();
private void resolveWeights() {
if (weight == null) {
for (int j = 0; j < MAX_WEIGHTS; j++) {
String symbol = weightSource[j];
if (symbol.length() <= 1) {
weight[0][j] = ordinalityOf(symbol);
} else {
for (int i = 0; i < symbol.length(); i++) {
char c = symbol.charAt(i);
weight[i][j] = ordinalityOf(""+c);
public PosixCollationBuilder(PosixCharMap map) { = map;
String ignoreSymbol = defineWeightSymbol("IGNORE");
ignoreRule = new CollationRule(ignoreSymbol);
rules.put(ignoreSymbol, ignoreRule);
lastRule = ignoreRule;
public String defineWeightSymbol(String symbol) {
order = null;
String c = nextFreeWeightChar();
map.defineMapping(symbol, c);
weightSymbols.put(c, symbol);
weightSymbols.put(symbol, c);
return c;
public String defineContraction(String symbol, String value) {
order = null;
String c = nextFreeWeightChar();
map.defineMapping(symbol, c);
contractions.put(c, value);
return c;
private String nextFreeWeightChar() {
String result = "";
String mappedSource;
do {
result = ""+(char)nextWeightChar--;
mappedSource = map.backmapValue(result);
} while (result != mappedSource);
return result;
* unescape a string in the format <U####>
public static String unescape(String src){
StringBuffer result = new StringBuffer();
int maxDig = 4;
if(src == null){
return src;
int srcLen= src.length();
for(int i=0; i<srcLen;i++){
char c = src.charAt(i);
if(c == '<'){
if(srcLen > i+1){
char c2 = src.charAt(++i);
if(c2 == 'U' && (i+maxDig+1)< srcLen){
if( src.charAt(i+maxDig)== '>'){
String subStr = src.substring(i,i+maxDig);
Integer val = Integer.valueOf(subStr,16);
result.append((char) val.intValue());
}catch(NumberFormatException ex){
i += maxDig;
System.err.println("WARNING: The escape sequence is not terminated at " + i +" in string: " + src);
return result.toString();
public int ordinalityOf(String symbol) {
// HexToUnicodeTransliterator newTranslit = new HexToUnicodeTransliterator();
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// newTranslit.transliterate(tSymbol);
symbol = unescape(symbol);
CollationRule w = (CollationRule)rules.get(symbol);
if (w != null) {
return w.charNumber;
} else {
System.err.print("ERROR: Weight symbol not found: ");
for (int i = 0 ; i < symbol.length(); i++) {
char c = symbol.charAt(i);
System.err.print(HEX_DIGIT[(c & 0x0F000) >> 12]); // HEX_DIGIT works for octal
System.err.print(HEX_DIGIT[(c & 0x0F00) >> 8]); // HEX_DIGIT works for octal
System.err.print(HEX_DIGIT[(c & 0x00F0) >> 4]);
System.err.println(HEX_DIGIT[(c & 0x000F)]);
System.err.println(" Weight given maximum possible value.");
return Integer.MAX_VALUE;
// HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
public void addRule(String symbol) {
// ReplaceableString tSymbol = new ReplaceableString(symbol);
// myTranslit.transliterate(tSymbol);
symbol = unescape(symbol);
if (symbol.length() > 1) {
System.err.println("WARNING: Undefined element '"+symbol+"'. collating-symbol generated.");
symbol = defineWeightSymbol(symbol);
order = null;
lastRule = new CollationRule(symbol);
rules.put(symbol, lastRule);
public void addRule(CollationRule rule) {
order = null;
lastRule = rule;
rules.put(rule.value, rule);
public void addWeight(String weight) {
if (weight.length() > 1) {
//check to see if it's a bogus weight symbol.
weight = map.mapKey(weight);
order = null;
public Enumeration getRules() {
return rules.elements();
public SortedVector getSortOrder() {
if (order == null) {
order = new SortedVector(
new Comparator() {
public int compare(final Object i, final Object j) {
final CollationRule o1 = (CollationRule)i;
final CollationRule o2 = (CollationRule)j;
if(o1 !=null && o2 != null){
final boolean w1 = o1.isWeightSymbol() != null;
final boolean w2 = o2.isWeightSymbol() != null;
//sort weights first
if (w1 && !w2) {
return -1;
} else if (!w1 && w2) {
return 1;
} else {
return -1;
//remove weight symbols from the list
int i;
for (i = 0; i < order.size(); i++) {
CollationRule r = (CollationRule)order.elementAt(i);
if (r.isWeightSymbol() == null) {
order.removeElements(0, i);
return order;
static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',