| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2011, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ******************************************************************************* |
| */ |
| package com.ibm.icu.text; |
| |
| import java.io.BufferedInputStream; |
| import java.io.DataInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.ByteBuffer; |
| |
| import com.ibm.icu.impl.ICUBinary; |
| import com.ibm.icu.impl.ICUData; |
| import com.ibm.icu.impl.ICUResourceBundle; |
| import com.ibm.icu.impl.IntTrie; |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA; |
| import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants; |
| import com.ibm.icu.text.RuleBasedCollator.UCAConstants; |
| import com.ibm.icu.util.Output; |
| import com.ibm.icu.util.VersionInfo; |
| |
| /** |
| * <p> |
| * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data. |
| * </p> |
| * <p> |
| * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks |
| * of data for use in <a href=Collator.html>com.ibm.icu.text.Collator</a>. |
| * </p> |
| * <p> |
| * uca.icu which is in big-endian format is jared together with this package. |
| * </p> |
| * |
| * @author Syn Wee Quek |
| * @since release 2.2, April 18 2002 |
| */ |
| |
| final class CollatorReader { |
| static char[] read(RuleBasedCollator rbc, UCAConstants ucac, |
| LeadByteConstants leadByteConstants, Output<Integer> maxUCAContractionLength) |
| throws IOException { |
| InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu"); |
| BufferedInputStream b = new BufferedInputStream(i, 90000); |
| CollatorReader reader = new CollatorReader(b); |
| char[] ucaContractions = reader.readImp(rbc, ucac, leadByteConstants, maxUCAContractionLength); |
| b.close(); |
| return ucaContractions; |
| } |
| |
| public static InputStream makeByteBufferInputStream(final ByteBuffer buf) { |
| return new InputStream() { |
| public int read() throws IOException { |
| if (!buf.hasRemaining()) { |
| return -1; |
| } |
| return buf.get() & 0xff; |
| } |
| |
| public int read(byte[] bytes, int off, int len) throws IOException { |
| len = Math.min(len, buf.remaining()); |
| buf.get(bytes, off, len); |
| return len; |
| } |
| }; |
| } |
| |
| static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException { |
| final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2; |
| int dataLength = data.remaining(); |
| // TODO: Change the rest of this class to use the ByteBuffer directly, rather than |
| // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader(). |
| // Consider changing ICUBinary to also work with a ByteBuffer. |
| CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false); |
| if (dataLength > MIN_BINARY_DATA_SIZE_) { |
| reader.readImp(rbc, null, null, null); |
| } else { |
| reader.readHeader(rbc, null); |
| reader.readOptions(rbc); |
| // duplicating UCA_'s data |
| rbc.setWithUCATables(); |
| } |
| } |
| |
| static InverseUCA getInverseUCA() throws IOException { |
| InverseUCA result = null; |
| InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu"); |
| // try { |
| // String invdat = "/com/ibm/icu/impl/data/invuca.icu"; |
| // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat); |
| BufferedInputStream b = new BufferedInputStream(i, 110000); |
| result = CollatorReader.readInverseUCA(b); |
| b.close(); |
| i.close(); |
| return result; |
| // } catch (Exception e) { |
| // throw new RuntimeException(e.getMessage()); |
| // } |
| } |
| |
| // protected constructor --------------------------------------------- |
| |
| /** |
| * <p> |
| * Protected constructor. |
| * </p> |
| * |
| * @param inputStream |
| * ICU collator file input stream |
| * @exception IOException |
| * throw if data file fails authentication |
| */ |
| private CollatorReader(InputStream inputStream) throws IOException { |
| this(inputStream, true); |
| /* |
| * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check |
| * that we have the correct Unicode version in // binary files VersionInfo UCDVersion = |
| * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != |
| * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new |
| * DataInputStream(inputStream); |
| */ |
| } |
| |
| /** |
| * <p> |
| * Protected constructor. |
| * </p> |
| * |
| * @param inputStream |
| * ICU uprops.icu file input stream |
| * @param readICUHeader |
| * flag to indicate if the ICU header has to be read |
| * @exception IOException |
| * throw if data file fails authentication |
| */ |
| private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException { |
| if (readICUHeader) { |
| byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); |
| // weiv: check that we have the correct Unicode version in |
| // binary files |
| VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); |
| if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) { |
| throw new IOException(WRONG_UNICODE_VERSION_ERROR_); |
| } |
| } |
| m_dataInputStream_ = new DataInputStream(inputStream); |
| } |
| |
| // protected methods ------------------------------------------------- |
| |
| /** |
| * Read and break up the header stream of data passed in as arguments into meaningful Collator data. |
| * |
| * @param rbc |
| * RuleBasedCollator to populate with header information |
| * @exception IOException |
| * thrown when there's a data error. |
| */ |
| private void readHeader(RuleBasedCollator rbc, Output<Integer> maxUCAContractionLength) throws IOException { |
| m_size_ = m_dataInputStream_.readInt(); |
| // all the offsets are in bytes |
| // to get the address add to the header address and cast properly |
| // Default options int options |
| m_headerSize_ = m_dataInputStream_.readInt(); // start of options |
| int readcount = 8; // for size and headersize |
| // structure which holds values for indirect positioning and implicit |
| // ranges |
| m_UCAConstOffset_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // this one is needed only for UCA, to copy the appropriate |
| // contractions |
| /*int contractionUCACombos =*/ m_dataInputStream_.readInt(); |
| readcount += 4; |
| // reserved for future use |
| m_dataInputStream_.skipBytes(4); |
| readcount += 4; |
| // const uint8_t *mappingPosition; |
| int mapping = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // uint32_t *expansion; |
| rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // UChar *contractionIndex; |
| rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // uint32_t *contractionCEs; |
| int contractionCE = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // needed for various closures int contractionSize |
| int contractionSize = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // array of last collation element in expansion |
| int expansionEndCE = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // array of maximum expansion size corresponding to the expansion |
| // collation elements with last element in expansionEndCE |
| int expansionEndCEMaxSize = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // size of endExpansionCE int expansionEndCESize |
| /* int endExpansionCECount = */m_dataInputStream_.readInt(); |
| readcount += 4; |
| // hash table of unsafe code points |
| int unsafe = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // hash table of final code points in contractions. |
| int contractionEnd = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // int CEcount = m_dataInputStream_.readInt(); |
| int contractionUCACombosSize = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // is jamoSpecial |
| rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); |
| readcount++; |
| // isBigEndian and charSetFamily |
| m_dataInputStream_.skipBytes(2); |
| readcount += 2; |
| int contractionUCACombosWidth = m_dataInputStream_.readByte(); |
| if (maxUCAContractionLength != null) { |
| maxUCAContractionLength.value = contractionUCACombosWidth; |
| } |
| // We want to be able to output this value if it's not 0. |
| assert contractionUCACombosWidth == 0 || maxUCAContractionLength != null; |
| readcount += 1; |
| rbc.m_version_ = readVersion(m_dataInputStream_); |
| readcount += 4; |
| rbc.m_UCA_version_ = readVersion(m_dataInputStream_); |
| readcount += 4; |
| rbc.m_UCD_version_ = readVersion(m_dataInputStream_); |
| readcount += 4; |
| /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_); |
| readcount += 4; |
| rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt(); |
| readcount += 4; |
| rbc.m_leadByteToScripts = m_dataInputStream_.readInt(); |
| readcount += 4; |
| |
| // byte charsetName[] = new byte[32]; // for charset CEs |
| m_dataInputStream_.skipBytes(32); |
| readcount += 32; |
| |
| m_dataInputStream_.skipBytes(44); // for future use |
| readcount += 44; |
| if (m_headerSize_ < readcount) { |
| // /CLOVER:OFF |
| throw new IOException("Internal Error: Header size error"); |
| // /CLOVER:ON |
| } |
| m_dataInputStream_.skipBytes(m_headerSize_ - readcount); |
| |
| if (rbc.m_contractionOffset_ == 0) { // contraction can be null |
| rbc.m_contractionOffset_ = mapping; |
| contractionCE = mapping; |
| } |
| m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_; |
| m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_; |
| m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_; |
| m_contractionCESize_ = mapping - contractionCE; |
| // m_trieSize_ = expansionEndCE - mapping; |
| m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE; |
| m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize; |
| m_unsafeSize_ = contractionEnd - unsafe; |
| // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later |
| m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2; |
| |
| // treat it as normal collator first |
| // for normal collator there is no UCA contraction |
| // contractions (UChar[contractionSize] + CE[contractionSize]) |
| m_contractionSize_ = contractionSize * 2 + contractionSize * 4; |
| |
| rbc.m_contractionOffset_ >>= 1; // casting to ints |
| rbc.m_expansionOffset_ >>= 2; // casting to chars |
| } |
| |
| /** |
| * Read and break up the collation options passed in the stream of data and update the argument Collator with the |
| * results |
| * |
| * @param rbc |
| * RuleBasedCollator to populate |
| * @exception IOException |
| * thrown when there's a data error. |
| */ |
| private void readOptions(RuleBasedCollator rbc) throws IOException { |
| int readcount = 0; |
| rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); |
| readcount += 4; |
| rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_); |
| readcount += 4; |
| rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() |
| // == RuleBasedCollator.AttributeValue.ON_); |
| int defaultIsCaseLevel = m_dataInputStream_.readInt(); |
| rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_); |
| readcount += 4; |
| int value = m_dataInputStream_.readInt(); |
| readcount += 4; |
| if (value == RuleBasedCollator.AttributeValue.ON_) { |
| value = Collator.CANONICAL_DECOMPOSITION; |
| } else { |
| value = Collator.NO_DECOMPOSITION; |
| } |
| rbc.m_defaultDecomposition_ = value; |
| rbc.m_defaultStrength_ = m_dataInputStream_.readInt(); |
| readcount += 4; |
| rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); |
| readcount += 4; |
| rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_); |
| readcount += 4; |
| m_dataInputStream_.skip(60); // reserved for future use |
| readcount += 60; |
| m_dataInputStream_.skipBytes(m_optionSize_ - readcount); |
| if (m_optionSize_ < readcount) { |
| // /CLOVER:OFF |
| throw new IOException("Internal Error: Option size error"); |
| // /CLOVER:ON |
| } |
| } |
| |
| /** |
| * Read and break up the stream of data passed in as arguments into meaningful Collator data. |
| * |
| * @param rbc |
| * RuleBasedCollator to populate |
| * @param UCAConst |
| * object to fill up with UCA constants if we are reading the UCA collator, if not use a null |
| * @param leadByteConstants |
| * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator |
| * @exception IOException |
| * thrown when there's a data error. |
| */ |
| private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst, |
| RuleBasedCollator.LeadByteConstants leadByteConstants, |
| Output<Integer> maxUCAContractionLength) throws IOException { |
| char ucaContractions[] = null; // return result |
| |
| readHeader(rbc, maxUCAContractionLength); |
| // header size has been checked by readHeader |
| int readcount = m_headerSize_; |
| // option size has been checked by readOptions |
| readOptions(rbc); |
| readcount += m_optionSize_; |
| m_expansionSize_ >>= 2; |
| rbc.m_expansion_ = new int[m_expansionSize_]; |
| for (int i = 0; i < m_expansionSize_; i++) { |
| rbc.m_expansion_[i] = m_dataInputStream_.readInt(); |
| } |
| readcount += (m_expansionSize_ << 2); |
| if (m_contractionIndexSize_ > 0) { |
| m_contractionIndexSize_ >>= 1; |
| rbc.m_contractionIndex_ = new char[m_contractionIndexSize_]; |
| for (int i = 0; i < m_contractionIndexSize_; i++) { |
| rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar(); |
| } |
| readcount += (m_contractionIndexSize_ << 1); |
| m_contractionCESize_ >>= 2; |
| rbc.m_contractionCE_ = new int[m_contractionCESize_]; |
| for (int i = 0; i < m_contractionCESize_; i++) { |
| rbc.m_contractionCE_[i] = m_dataInputStream_.readInt(); |
| } |
| readcount += (m_contractionCESize_ << 2); |
| } |
| rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance()); |
| if (!rbc.m_trie_.isLatin1Linear()) { |
| throw new IOException("Data corrupted, " + "Collator Tries expected to have linear " |
| + "latin one data arrays"); |
| } |
| readcount += rbc.m_trie_.getSerializedDataSize(); |
| m_expansionEndCESize_ >>= 2; |
| rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_]; |
| for (int i = 0; i < m_expansionEndCESize_; i++) { |
| rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt(); |
| } |
| readcount += (m_expansionEndCESize_ << 2); |
| rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_]; |
| for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) { |
| rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte(); |
| } |
| readcount += m_expansionEndCEMaxSizeSize_; |
| rbc.m_unsafe_ = new byte[m_unsafeSize_]; |
| for (int i = 0; i < m_unsafeSize_; i++) { |
| rbc.m_unsafe_[i] = m_dataInputStream_.readByte(); |
| } |
| readcount += m_unsafeSize_; |
| if (UCAConst != null) { |
| // we are reading the UCA |
| // unfortunately the UCA offset in any collator data is not 0 and |
| // only refers to the UCA data |
| // m_contractionSize_ -= m_UCAValuesSize_; |
| m_contractionSize_ = m_UCAConstOffset_ - readcount; |
| } else { |
| m_contractionSize_ = m_size_ - readcount; |
| } |
| rbc.m_contractionEnd_ = new byte[m_contractionSize_]; |
| for (int i = 0; i < m_contractionSize_; i++) { |
| rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte(); |
| } |
| readcount += m_contractionSize_; |
| if (UCAConst != null) { |
| UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| int readUCAConstcount = 4; |
| UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt(); |
| readUCAConstcount += 4; |
| |
| readcount += readUCAConstcount; |
| |
| int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2; |
| assert resultsize == m_UCAcontractionSize_ / 2; |
| ucaContractions = new char[resultsize]; |
| for (int i = 0; i < resultsize; i++) { |
| ucaContractions[i] = m_dataInputStream_.readChar(); |
| } |
| readcount += m_UCAcontractionSize_; |
| } |
| |
| if (leadByteConstants != null) { |
| readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount); |
| leadByteConstants.read(m_dataInputStream_); |
| readcount += leadByteConstants.getSerializedDataSize(); |
| } |
| |
| if (readcount != m_size_) { |
| // /CLOVER:OFF |
| throw new IOException("Internal Error: Data file size error"); |
| // /CLOVER:ON |
| } |
| return ucaContractions; |
| } |
| |
| /** |
| * Reads in the inverse uca data |
| * |
| * @param input |
| * input stream with the inverse uca data |
| * @return an object containing the inverse uca data |
| * @exception IOException |
| * thrown when error occurs while reading the inverse uca |
| */ |
| private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException { |
| byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_, |
| INVERSE_UCA_AUTHENTICATE_); |
| |
| // weiv: check that we have the correct Unicode version in |
| // binary files |
| VersionInfo UCDVersion = UCharacter.getUnicodeVersion(); |
| if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) { |
| throw new IOException(WRONG_UNICODE_VERSION_ERROR_); |
| } |
| |
| CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA(); |
| DataInputStream input = new DataInputStream(inputStream); |
| input.readInt(); // bytesize |
| int tablesize = input.readInt(); // in int size |
| int contsize = input.readInt(); // in char size |
| input.readInt(); // table in bytes |
| input.readInt(); // conts in bytes |
| result.m_UCA_version_ = readVersion(input); |
| input.skipBytes(8); // skip padding |
| |
| int size = tablesize * 3; // one column for each strength |
| result.m_table_ = new int[size]; |
| result.m_continuations_ = new char[contsize]; |
| |
| for (int i = 0; i < size; i++) { |
| result.m_table_[i] = input.readInt(); |
| } |
| for (int i = 0; i < contsize; i++) { |
| result.m_continuations_[i] = input.readChar(); |
| } |
| input.close(); |
| return result; |
| } |
| |
| /** |
| * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions. |
| * |
| * @param input |
| * already instantiated DataInputStream, positioned at the start of four version bytes |
| * @return a ready VersionInfo object |
| * @throws IOException |
| * thrown when error occurs while reading version bytes |
| */ |
| |
| protected static VersionInfo readVersion(DataInputStream input) throws IOException { |
| byte[] version = new byte[4]; |
| version[0] = input.readByte(); |
| version[1] = input.readByte(); |
| version[2] = input.readByte(); |
| version[3] = input.readByte(); |
| |
| VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2], |
| (int) version[3]); |
| |
| return result; |
| } |
| |
| // private inner class ----------------------------------------------- |
| |
| // private variables ------------------------------------------------- |
| |
| /** |
| * Authenticate uca data format version |
| */ |
| private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() { |
| public boolean isDataVersionAcceptable(byte version[]) { |
| return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1]; |
| // Too harsh |
| // && version[1] == DATA_FORMAT_VERSION_[1] |
| // && version[2] == DATA_FORMAT_VERSION_[2] |
| // && version[3] == DATA_FORMAT_VERSION_[3]; |
| } |
| }; |
| |
| /** |
| * Authenticate uca data format version |
| */ |
| private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() { |
| public boolean isDataVersionAcceptable(byte version[]) { |
| return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0] |
| && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1]; |
| } |
| }; |
| |
| /** |
| * Data input stream for uca.icu |
| */ |
| private DataInputStream m_dataInputStream_; |
| |
| /** |
| * File format version and id that this class understands. No guarantees are made if a older version is used |
| */ |
| private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 }; |
| private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c }; |
| /** |
| * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is |
| * used |
| */ |
| private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 }; |
| private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 }; |
| |
| /** |
| * Wrong unicode version error string |
| */ |
| private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version"; |
| |
| /** |
| * Size of expansion table in bytes |
| */ |
| private int m_expansionSize_; |
| /** |
| * Size of contraction index table in bytes |
| */ |
| private int m_contractionIndexSize_; |
| /** |
| * Size of contraction table in bytes |
| */ |
| private int m_contractionCESize_; |
| /* |
| * Size of the Trie in bytes |
| */ |
| // private int m_trieSize_; |
| /** |
| * Size of the table that contains information about collation elements that end with an expansion |
| */ |
| private int m_expansionEndCESize_; |
| /** |
| * Size of the table that contains information about the maximum size of collation elements that end with a |
| * particular expansion CE corresponding to the ones in expansionEndCE |
| */ |
| private int m_expansionEndCEMaxSizeSize_; |
| /** |
| * Size of the option table that contains information about the collation options |
| */ |
| private int m_optionSize_; |
| /** |
| * Size of the whole data file minusing the ICU header |
| */ |
| private int m_size_; |
| /** |
| * Size of the collation data header |
| */ |
| private int m_headerSize_; |
| /** |
| * Size of the table that contains information about the "Unsafe" codepoints |
| */ |
| private int m_unsafeSize_; |
| /** |
| * Size in bytes of the table that contains information about codepoints that ends with a contraction |
| */ |
| private int m_contractionSize_; |
| /** |
| * Size of the table that contains UCA contraction information in bytes |
| */ |
| private int m_UCAcontractionSize_; |
| /** |
| * Offset of the UCA Const |
| */ |
| private int m_UCAConstOffset_; |
| |
| // private methods --------------------------------------------------- |
| |
| } |