blob: bf700a7ea94b47fe1d071f68e02626e114130046 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Types.java,v $
* $Date: 2005/04/06 08:48:17 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
package com.ibm.text.UCA;
import com.ibm.text.UCD.*;
import com.ibm.text.utility.*;
public interface UCA_Types {
/**
* Version of the UCA tables to use
*/
//private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7";
//public static final String UCA_BASE = "4.1.0"; // "3.1.1"; // ; // ""; // "-2.1.9d7";
//public static final String VERSION = "-" + UCA_BASE; // + "d6" ""; // "-2.1.9d7";
public static final String ALLFILES = "allkeys"; // null if not there
public static final String BASE_UCA_GEN_DIR = UCD_Types.GEN_DIR + "collation" + "\\";
public static final char LEVEL_SEPARATOR = '\u0000';
/**
* Expanding characters are marked with a exception bit combination
* in the collationElement table.
* This means that they map to more than one CE, which is looked up in
* the expansionTable by index.
*/
static final int EXPANDING_MASK = 0xFFFF0000; // marks expanding range start
/**
* This mask is used to get the index from an EXPANDING exception.
* The contracting characters can also make use of this in a future optimization.
*/
static final int EXCEPTION_INDEX_MASK = 0x0000FFFF;
/**
* Contracting characters are marked with a exception bit combination
* in the collationElement table.
* This means that they are the first character of a contraction, and need
* to be looked up (with following characters) in the contractingTable.<br>
* This isn't a MASK since there is exactly one value.
*/
static final int CONTRACTING = 0xFFFE0000;
static final int UNSUPPORTED_FLAG = 0xFFFD0000;
/**
* Used to composed Hangul and Han characters
*/
static final int NEUTRAL_SECONDARY = 0x20;
static final int NEUTRAL_TERTIARY = 0x02;
/** Enum for alternate handling */
public static final byte SHIFTED = 0, ZEROED = 1, NON_IGNORABLE = 2, SHIFTED_TRIMMED = 3, LAST = 3;
/**
* Used to terminate a list of CEs
*/
public static final int TERMINATOR = 0xFFFFFFFF; // CE that marks end of string
/**
* Any unsupported characters (those not in the UCA data tables)
* are marked with a exception bit combination
* so that they can be treated specially.<br>
* There are at least 34 values, so that we can use a range for surrogates
* However, we do add to the first weight if we have surrogate pairs!
*/
static final int UNSUPPORTED_CJK_BASE = 0xFB40;
static final int UNSUPPORTED_CJK_AB_BASE = 0xFB80;
static final int UNSUPPORTED_OTHER_BASE = 0xFBC0;
static final int UNSUPPORTED_BASE = UNSUPPORTED_CJK_BASE;
static final int UNSUPPORTED_LIMIT = UNSUPPORTED_OTHER_BASE + 0x40;
/**
* Special char value that means failed or terminated
*/
static final char NOT_A_CHAR = '\uFFFF';
/**
* CEType
*/
static final byte NORMAL_CE = 0, CONTRACTING_CE = 1, EXPANDING_CE = 2,
CJK_CE = 3, CJK_AB_CE = 4, HANGUL_CE = 5, UNSUPPORTED_CE = 7,
FIXED_CE = 3;
// SURROGATE_CE = 6,
}