001: /**
002: *******************************************************************************
003: * Copyright (C) 1996-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.text;
007:
008: import java.io.BufferedInputStream;
009: import java.io.ByteArrayInputStream;
010: import java.io.InputStream;
011: import java.io.DataInputStream;
012: import java.io.IOException;
013:
014: import com.ibm.icu.impl.ICUBinary;
015: import com.ibm.icu.impl.ICUData;
016: import com.ibm.icu.impl.ICUResourceBundle;
017: import com.ibm.icu.impl.IntTrie;
018: import com.ibm.icu.lang.UCharacter;
019: import com.ibm.icu.util.VersionInfo;
020: import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
021: import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
022:
023: /**
024: * <p>Internal reader class for ICU data file uca.icu containing
025: * Unicode Collation Algorithm data.</p>
026: * <p>This class simply reads uca.icu, authenticates that it is a valid
027: * ICU data file and split its contents up into blocks of data for use in
028: * <a href=Collator.html>com.ibm.icu.text.Collator</a>.
029: * </p>
030: * <p>uca.icu which is in big-endian format is jared together with this
031: * package.</p>
032: * @author Syn Wee Quek
033: * @since release 2.2, April 18 2002
034: * @draft 2.2
035: */
036:
037: final class CollatorReader {
038: static char[] read(RuleBasedCollator rbc, UCAConstants ucac)
039: throws IOException {
040: InputStream i = ICUData
041: .getRequiredStream(ICUResourceBundle.ICU_BUNDLE
042: + "/ucadata.icu");
043: BufferedInputStream b = new BufferedInputStream(i, 90000);
044: CollatorReader reader = new CollatorReader(b);
045: char[] result = reader.readImp(rbc, ucac);
046: b.close();
047: return result;
048: }
049:
050: static void initRBC(RuleBasedCollator rbc, byte[] data)
051: throws IOException {
052: final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
053:
054: InputStream i = new ByteArrayInputStream(data);
055: BufferedInputStream b = new BufferedInputStream(i);
056: CollatorReader reader = new CollatorReader(b, false);
057: if (data.length > MIN_BINARY_DATA_SIZE_) {
058: reader.readImp(rbc, null);
059: } else {
060: reader.readHeader(rbc);
061: reader.readOptions(rbc);
062: // duplicating UCA_'s data
063: rbc.setWithUCATables();
064: }
065: }
066:
067: static InverseUCA getInverseUCA() throws IOException {
068: InverseUCA result = null;
069: InputStream i = ICUData
070: .getRequiredStream(ICUResourceBundle.ICU_BUNDLE
071: + "/invuca.icu");
072: // try {
073: // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
074: // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
075: BufferedInputStream b = new BufferedInputStream(i, 110000);
076: result = CollatorReader.readInverseUCA(b);
077: b.close();
078: i.close();
079: return result;
080: // } catch (Exception e) {
081: // throw new RuntimeException(e.getMessage());
082: // }
083: }
084:
085: // protected constructor ---------------------------------------------
086:
087: /**
088: * <p>Protected constructor.</p>
089: * @param inputStream ICU collator file input stream
090: * @exception IOException throw if data file fails authentication
091: * @draft 2.1
092: */
093: private CollatorReader(InputStream inputStream) throws IOException {
094: this (inputStream, true);
095: /*
096: byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
097: // weiv: check that we have the correct Unicode version in
098: // binary files
099: VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
100: if(UnicodeVersion[0] != UCDVersion.getMajor()
101: || UnicodeVersion[1] != UCDVersion.getMinor()) {
102: throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
103: }
104: m_dataInputStream_ = new DataInputStream(inputStream);
105: */
106: }
107:
108: /**
109: * <p>Protected constructor.</p>
110: * @param inputStream ICU uprops.icu file input stream
111: * @param readICUHeader flag to indicate if the ICU header has to be read
112: * @exception IOException throw if data file fails authentication
113: * @draft 2.1
114: */
115: private CollatorReader(InputStream inputStream,
116: boolean readICUHeader) throws IOException {
117: if (readICUHeader) {
118: byte[] UnicodeVersion = ICUBinary.readHeader(inputStream,
119: DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
120: // weiv: check that we have the correct Unicode version in
121: // binary files
122: VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
123: if (UnicodeVersion[0] != UCDVersion.getMajor()
124: || UnicodeVersion[1] != UCDVersion.getMinor()) {
125: throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
126: }
127: }
128: m_dataInputStream_ = new DataInputStream(inputStream);
129: }
130:
131: // protected methods -------------------------------------------------
132:
133: /**
134: * Read and break up the header stream of data passed in as arguments into
135: * meaningful Collator data.
136: * @param rbc RuleBasedCollator to populate with header information
137: * @exception IOException thrown when there's a data error.
138: */
139: private void readHeader(RuleBasedCollator rbc) throws IOException {
140: m_size_ = m_dataInputStream_.readInt();
141: // all the offsets are in bytes
142: // to get the address add to the header address and cast properly
143: // Default options int options
144: m_headerSize_ = m_dataInputStream_.readInt(); // start of options
145: int readcount = 8; // for size and headersize
146: // structure which holds values for indirect positioning and implicit
147: // ranges
148: int UCAConst = m_dataInputStream_.readInt();
149: readcount += 4;
150: // this one is needed only for UCA, to copy the appropriate
151: // contractions
152: m_dataInputStream_.skip(4);
153: readcount += 4;
154: // reserved for future use
155: m_dataInputStream_.skipBytes(4);
156: readcount += 4;
157: // const uint8_t *mappingPosition;
158: int mapping = m_dataInputStream_.readInt();
159: readcount += 4;
160: // uint32_t *expansion;
161: rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
162: readcount += 4;
163: // UChar *contractionIndex;
164: rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
165: readcount += 4;
166: // uint32_t *contractionCEs;
167: int contractionCE = m_dataInputStream_.readInt();
168: readcount += 4;
169: // needed for various closures int contractionSize
170: int contractionSize = m_dataInputStream_.readInt();
171: readcount += 4;
172: // array of last collation element in expansion
173: int expansionEndCE = m_dataInputStream_.readInt();
174: readcount += 4;
175: // array of maximum expansion size corresponding to the expansion
176: // collation elements with last element in expansionEndCE
177: int expansionEndCEMaxSize = m_dataInputStream_.readInt();
178: readcount += 4;
179: // size of endExpansionCE int expansionEndCESize
180: m_dataInputStream_.skipBytes(4);
181: readcount += 4;
182: // hash table of unsafe code points
183: int unsafe = m_dataInputStream_.readInt();
184: readcount += 4;
185: // hash table of final code points in contractions.
186: int contractionEnd = m_dataInputStream_.readInt();
187: readcount += 4;
188: // int CEcount = m_dataInputStream_.readInt();
189: m_dataInputStream_.skipBytes(4);
190: readcount += 4;
191: // is jamoSpecial
192: rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
193: readcount++;
194: // padding
195: m_dataInputStream_.skipBytes(3);
196: readcount += 3;
197: rbc.m_version_ = readVersion(m_dataInputStream_);
198: readcount += 4;
199: rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
200: readcount += 4;
201: rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
202: readcount += 4;
203: // byte charsetName[] = new byte[32]; // for charset CEs
204: m_dataInputStream_.skipBytes(32);
205: readcount += 32;
206: m_dataInputStream_.skipBytes(56); // for future use
207: readcount += 56;
208: if (m_headerSize_ < readcount) {
209: throw new IOException("Internal Error: Header size error");
210: }
211: m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
212:
213: if (rbc.m_contractionOffset_ == 0) { // contraction can be null
214: rbc.m_contractionOffset_ = mapping;
215: contractionCE = mapping;
216: }
217: m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
218: m_expansionSize_ = rbc.m_contractionOffset_
219: - rbc.m_expansionOffset_;
220: m_contractionIndexSize_ = contractionCE
221: - rbc.m_contractionOffset_;
222: m_contractionCESize_ = mapping - contractionCE;
223: m_trieSize_ = expansionEndCE - mapping;
224: m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
225: m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
226: m_unsafeSize_ = contractionEnd - unsafe;
227: m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
228: // treat it as normal collator first
229: // for normal collator there is no UCA contraction
230: m_contractionEndSize_ = m_size_ - contractionEnd;
231:
232: rbc.m_contractionOffset_ >>= 1; // casting to ints
233: rbc.m_expansionOffset_ >>= 2; // casting to chars
234: }
235:
236: /**
237: * Read and break up the collation options passed in the stream of data
238: * and update the argument Collator with the results
239: * @param rbc RuleBasedCollator to populate
240: * @exception IOException thrown when there's a data error.
241: * @draft 2.2
242: */
243: private void readOptions(RuleBasedCollator rbc) throws IOException {
244: int readcount = 0;
245: rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
246: readcount += 4;
247: rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
248: readcount += 4;
249: rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_
250: .readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
251: readcount += 4;
252: rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
253: readcount += 4;
254: rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
255: readcount += 4;
256: int value = m_dataInputStream_.readInt();
257: readcount += 4;
258: if (value == RuleBasedCollator.AttributeValue.ON_) {
259: value = Collator.CANONICAL_DECOMPOSITION;
260: } else {
261: value = Collator.NO_DECOMPOSITION;
262: }
263: rbc.m_defaultDecomposition_ = value;
264: rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
265: readcount += 4;
266: rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
267: readcount += 4;
268: rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_
269: .readInt() == RuleBasedCollator.AttributeValue.ON_);
270: readcount += 4;
271: m_dataInputStream_.skip(60); // reserved for future use
272: readcount += 60;
273: m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
274: if (m_optionSize_ < readcount) {
275: throw new IOException("Internal Error: Option size error");
276: }
277: }
278:
279: /**
280: * Read and break up the stream of data passed in as arguments into
281: * meaningful Collator data.
282: * @param rbc RuleBasedCollator to populate
283: * @param UCAConst object to fill up with UCA constants if we are reading
284: * the UCA collator, if not use a null
285: * @return UCAContractions array filled up with the UCA contractions if we
286: * are reading the UCA collator
287: * @exception IOException thrown when there's a data error.
288: * @draft 2.2
289: */
290: private char[] readImp(RuleBasedCollator rbc,
291: RuleBasedCollator.UCAConstants UCAConst) throws IOException {
292: readHeader(rbc);
293: // header size has been checked by readHeader
294: int readcount = m_headerSize_;
295: // option size has been checked by readOptions
296: readOptions(rbc);
297: readcount += m_optionSize_;
298: m_expansionSize_ >>= 2;
299: rbc.m_expansion_ = new int[m_expansionSize_];
300: for (int i = 0; i < m_expansionSize_; i++) {
301: rbc.m_expansion_[i] = m_dataInputStream_.readInt();
302: }
303: readcount += (m_expansionSize_ << 2);
304: if (m_contractionIndexSize_ > 0) {
305: m_contractionIndexSize_ >>= 1;
306: rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
307: for (int i = 0; i < m_contractionIndexSize_; i++) {
308: rbc.m_contractionIndex_[i] = m_dataInputStream_
309: .readChar();
310: }
311: readcount += (m_contractionIndexSize_ << 1);
312: m_contractionCESize_ >>= 2;
313: rbc.m_contractionCE_ = new int[m_contractionCESize_];
314: for (int i = 0; i < m_contractionCESize_; i++) {
315: rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
316: }
317: readcount += (m_contractionCESize_ << 2);
318: }
319: rbc.m_trie_ = new IntTrie(m_dataInputStream_,
320: RuleBasedCollator.DataManipulate.getInstance());
321: if (!rbc.m_trie_.isLatin1Linear()) {
322: throw new IOException("Data corrupted, "
323: + "Collator Tries expected to have linear "
324: + "latin one data arrays");
325: }
326: readcount += rbc.m_trie_.getSerializedDataSize();
327: m_expansionEndCESize_ >>= 2;
328: rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
329: for (int i = 0; i < m_expansionEndCESize_; i++) {
330: rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
331: }
332: readcount += (m_expansionEndCESize_ << 2);
333: rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
334: for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
335: rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_
336: .readByte();
337: }
338: readcount += m_expansionEndCEMaxSizeSize_;
339: rbc.m_unsafe_ = new byte[m_unsafeSize_];
340: for (int i = 0; i < m_unsafeSize_; i++) {
341: rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
342: }
343: readcount += m_unsafeSize_;
344: if (UCAConst != null) {
345: // we are reading the UCA
346: // unfortunately the UCA offset in any collator data is not 0 and
347: // only refers to the UCA data
348: m_contractionEndSize_ -= m_UCAValuesSize_;
349: }
350: rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
351: for (int i = 0; i < m_contractionEndSize_; i++) {
352: rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
353: }
354: readcount += m_contractionEndSize_;
355: if (UCAConst != null) {
356: UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_
357: .readInt();
358: int readUCAConstcount = 4;
359: UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_
360: .readInt();
361: readUCAConstcount += 4;
362: UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_
363: .readInt();
364: readUCAConstcount += 4;
365: UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_
366: .readInt();
367: readUCAConstcount += 4;
368: UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_
369: .readInt();
370: readUCAConstcount += 4;
371: UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_
372: .readInt();
373: readUCAConstcount += 4;
374: UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_
375: .readInt();
376: readUCAConstcount += 4;
377: UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_
378: .readInt();
379: readUCAConstcount += 4;
380: UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_
381: .readInt();
382: readUCAConstcount += 4;
383: UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_
384: .readInt();
385: readUCAConstcount += 4;
386: UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_
387: .readInt();
388: readUCAConstcount += 4;
389: UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_
390: .readInt();
391: readUCAConstcount += 4;
392: UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
393: readUCAConstcount += 4;
394: UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
395: readUCAConstcount += 4;
396: UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
397: readUCAConstcount += 4;
398: UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
399: readUCAConstcount += 4;
400: UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_
401: .readInt();
402: readUCAConstcount += 4;
403: UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_
404: .readInt();
405: readUCAConstcount += 4;
406: UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_
407: .readInt();
408: readUCAConstcount += 4;
409: UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_
410: .readInt();
411: readUCAConstcount += 4;
412: UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
413: readUCAConstcount += 4;
414: UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
415: readUCAConstcount += 4;
416: UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
417: readUCAConstcount += 4;
418: UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
419: readUCAConstcount += 4;
420: UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
421: readUCAConstcount += 4;
422: UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
423: readUCAConstcount += 4;
424: UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
425: readUCAConstcount += 4;
426: UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
427: readUCAConstcount += 4;
428: UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
429: readUCAConstcount += 4;
430: UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
431: readUCAConstcount += 4;
432: UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
433: readUCAConstcount += 4;
434: UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_
435: .readInt();
436: readUCAConstcount += 4;
437: UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_
438: .readInt();
439: readUCAConstcount += 4;
440: UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_
441: .readInt();
442: readUCAConstcount += 4;
443: UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_
444: .readInt();
445: readUCAConstcount += 4;
446: UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_
447: .readInt();
448: readUCAConstcount += 4;
449: UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_
450: .readInt();
451: readUCAConstcount += 4;
452: int resultsize = (m_UCAValuesSize_ - readUCAConstcount) >> 1;
453: char result[] = new char[resultsize];
454: for (int i = 0; i < resultsize; i++) {
455: result[i] = m_dataInputStream_.readChar();
456: }
457: readcount += m_UCAValuesSize_;
458: if (readcount != m_size_) {
459: throw new IOException(
460: "Internal Error: Data file size error");
461: }
462: return result;
463: }
464: if (readcount != m_size_) {
465: throw new IOException(
466: "Internal Error: Data file size error");
467: }
468: return null;
469: }
470:
471: /**
472: * Reads in the inverse uca data
473: * @param input input stream with the inverse uca data
474: * @return an object containing the inverse uca data
475: * @exception IOException thrown when error occurs while reading the
476: * inverse uca
477: */
478: private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(
479: InputStream inputStream) throws IOException {
480: byte[] UnicodeVersion = ICUBinary.readHeader(inputStream,
481: INVERSE_UCA_DATA_FORMAT_ID_, INVERSE_UCA_AUTHENTICATE_);
482:
483: // weiv: check that we have the correct Unicode version in
484: // binary files
485: VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
486: if (UnicodeVersion[0] != UCDVersion.getMajor()
487: || UnicodeVersion[1] != UCDVersion.getMinor()) {
488: throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
489: }
490:
491: CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
492: DataInputStream input = new DataInputStream(inputStream);
493: input.readInt(); // bytesize
494: int tablesize = input.readInt(); // in int size
495: int contsize = input.readInt(); // in char size
496: input.readInt(); // table in bytes
497: input.readInt(); // conts in bytes
498: result.m_UCA_version_ = readVersion(input);
499: input.skipBytes(8); // skip padding
500:
501: int size = tablesize * 3; // one column for each strength
502: result.m_table_ = new int[size];
503: result.m_continuations_ = new char[contsize];
504:
505: for (int i = 0; i < size; i++) {
506: result.m_table_[i] = input.readInt();
507: }
508: for (int i = 0; i < contsize; i++) {
509: result.m_continuations_[i] = input.readChar();
510: }
511: input.close();
512: return result;
513: }
514:
515: /**
516: * Reads four bytes from the input and returns a VersionInfo
517: * object. Use it to read different collator versions.
518: * @param input already instantiated DataInputStream, positioned
519: * at the start of four version bytes
520: * @return a ready VersionInfo object
521: * @throws IOException thrown when error occurs while reading
522: * version bytes
523: */
524:
525: protected static VersionInfo readVersion(DataInputStream input)
526: throws IOException {
527: byte[] version = new byte[4];
528: version[0] = input.readByte();
529: version[1] = input.readByte();
530: version[2] = input.readByte();
531: version[3] = input.readByte();
532:
533: VersionInfo result = VersionInfo.getInstance((int) version[0],
534: (int) version[1], (int) version[2], (int) version[3]);
535:
536: return result;
537: }
538:
539: // private inner class -----------------------------------------------
540:
541: // private variables -------------------------------------------------
542:
543: /**
544: * Authenticate uca data format version
545: */
546: private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
547: public boolean isDataVersionAcceptable(byte version[]) {
548: return version[0] == DATA_FORMAT_VERSION_[0]
549: && version[1] >= DATA_FORMAT_VERSION_[1];
550: // Too harsh
551: //&& version[1] == DATA_FORMAT_VERSION_[1]
552: //&& version[2] == DATA_FORMAT_VERSION_[2]
553: //&& version[3] == DATA_FORMAT_VERSION_[3];
554: }
555: };
556:
557: /**
558: * Authenticate uca data format version
559: */
560: private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
561: public boolean isDataVersionAcceptable(byte version[]) {
562: return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
563: && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
564: }
565: };
566:
567: /**
568: * Data input stream for uca.icu
569: */
570: private DataInputStream m_dataInputStream_;
571:
572: /**
573: * File format version and id that this class understands.
574: * No guarantees are made if a older version is used
575: */
576: private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x2,
577: (byte) 0x2, (byte) 0x0, (byte) 0x0 };
578: private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55,
579: (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
580: /**
581: * Inverse UCA file format version and id that this class understands.
582: * No guarantees are made if a older version is used
583: */
584: private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = {
585: (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
586: private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = {
587: (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
588: /**
589: * Corrupted error string
590: */
591: private static final String CORRUPTED_DATA_ERROR_ = "Data corrupted in Collation data file";
592:
593: /**
594: * Wrong unicode version error string
595: */
596: private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
597:
598: /**
599: * Size of expansion table in bytes
600: */
601: private int m_expansionSize_;
602: /**
603: * Size of contraction index table in bytes
604: */
605: private int m_contractionIndexSize_;
606: /**
607: * Size of contraction table in bytes
608: */
609: private int m_contractionCESize_;
610: /**
611: * Size of the Trie in bytes
612: */
613: private int m_trieSize_;
614: /**
615: * Size of the table that contains information about collation elements
616: * that end with an expansion
617: */
618: private int m_expansionEndCESize_;
619: /**
620: * Size of the table that contains information about the maximum size of
621: * collation elements that end with a particular expansion CE corresponding
622: * to the ones in expansionEndCE
623: */
624: private int m_expansionEndCEMaxSizeSize_;
625: /**
626: * Size of the option table that contains information about the collation
627: * options
628: */
629: private int m_optionSize_;
630: /**
631: * Size of the whole data file minusing the ICU header
632: */
633: private int m_size_;
634: /**
635: * Size of the collation data header
636: */
637: private int m_headerSize_;
638: /**
639: * Size of the table that contains information about the "Unsafe"
640: * codepoints
641: */
642: private int m_unsafeSize_;
643: /**
644: * Size of the table that contains information about codepoints that ends
645: * with a contraction
646: */
647: private int m_contractionEndSize_;
648: /**
649: * Size of the table that contains UCA contraction information
650: */
651: private int m_UCAValuesSize_;
652:
653: // private methods ---------------------------------------------------
654:
655: }
|