001: /*
002: ******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: ******************************************************************************
006: */
007:
008: package com.ibm.icu.impl;
009:
010: import java.io.InputStream;
011: import java.io.DataInputStream;
012: import java.io.IOException;
013: import java.util.Arrays;
014: import com.ibm.icu.text.UTF16;
015:
016: /**
017: * Trie implementation which stores data in int, 32 bits.
018: * @author synwee
019: * @see com.ibm.icu.impl.Trie
020: * @since release 2.1, Jan 01 2002
021: */
022: public class IntTrie extends Trie {
023: // public constructors ---------------------------------------------
024:
025: /**
026: * <p>Creates a new Trie with the settings for the trie data.</p>
027: * <p>Unserialize the 32-bit-aligned input stream and use the data for the
028: * trie.</p>
029: * @param inputStream file input stream to a ICU data file, containing
030: * the trie
031: * @param dataManipulate object which provides methods to parse the char
032: * data
033: * @throws IOException thrown when data reading fails
034: * @draft 2.1
035: */
036: public IntTrie(InputStream inputStream,
037: DataManipulate dataManipulate) throws IOException {
038: super (inputStream, dataManipulate);
039: if (!isIntTrie()) {
040: throw new IllegalArgumentException(
041: "Data given does not belong to a int trie.");
042: }
043: }
044:
045: /**
046: * Make a dummy IntTrie.
047: * A dummy trie is an empty runtime trie, used when a real data trie cannot
048: * be loaded.
049: *
050: * The trie always returns the initialValue,
051: * or the leadUnitValue for lead surrogate code points.
052: * The Latin-1 part is always set up to be linear.
053: *
054: * @param initialValue the initial value that is set for all code points
055: * @param leadUnitValue the value for lead surrogate code _units_ that do not
056: * have associated supplementary data
057: * @param dataManipulate object which provides methods to parse the char data
058: */
059: public IntTrie(int initialValue, int leadUnitValue,
060: DataManipulate dataManipulate) {
061: super (new char[BMP_INDEX_LENGTH + SURROGATE_BLOCK_COUNT],
062: HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
063:
064: int dataLength, latin1Length, i, limit;
065: char block;
066:
067: /* calculate the actual size of the dummy trie data */
068:
069: /* max(Latin-1, block 0) */
070: dataLength = latin1Length = INDEX_STAGE_1_SHIFT_ <= 8 ? 256
071: : DATA_BLOCK_LENGTH;
072: if (leadUnitValue != initialValue) {
073: dataLength += DATA_BLOCK_LENGTH;
074: }
075: m_data_ = new int[dataLength];
076: m_dataLength_ = dataLength;
077:
078: m_initialValue_ = initialValue;
079:
080: /* fill the index and data arrays */
081:
082: /* indexes are preset to 0 (block 0) */
083:
084: /* Latin-1 data */
085: for (i = 0; i < latin1Length; ++i) {
086: m_data_[i] = initialValue;
087: }
088:
089: if (leadUnitValue != initialValue) {
090: /* indexes for lead surrogate code units to the block after Latin-1 */
091: block = (char) (latin1Length >> INDEX_STAGE_2_SHIFT_);
092: i = 0xd800 >> INDEX_STAGE_1_SHIFT_;
093: limit = 0xdc00 >> INDEX_STAGE_1_SHIFT_;
094: for (; i < limit; ++i) {
095: m_index_[i] = block;
096: }
097:
098: /* data for lead surrogate code units */
099: limit = latin1Length + DATA_BLOCK_LENGTH;
100: for (i = latin1Length; i < limit; ++i) {
101: m_data_[i] = leadUnitValue;
102: }
103: }
104: }
105:
106: // public methods --------------------------------------------------
107:
108: /**
109: * Gets the value associated with the codepoint.
110: * If no value is associated with the codepoint, a default value will be
111: * returned.
112: * @param ch codepoint
113: * @return offset to data
114: * @draft 2.1
115: */
116: public final int getCodePointValue(int ch) {
117: int offset;
118:
119: // fastpath for U+0000..U+D7FF
120: if (0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
121: // copy of getRawOffset()
122: offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
123: + (ch & INDEX_STAGE_3_MASK_);
124: return m_data_[offset];
125: }
126:
127: // handle U+D800..U+10FFFF
128: offset = getCodePointOffset(ch);
129: return (offset >= 0) ? m_data_[offset] : m_initialValue_;
130: }
131:
132: /**
133: * Gets the value to the data which this lead surrogate character points
134: * to.
135: * Returned data may contain folding offset information for the next
136: * trailing surrogate character.
137: * This method does not guarantee correct results for trail surrogates.
138: * @param ch lead surrogate character
139: * @return data value
140: * @draft 2.1
141: */
142: public final int getLeadValue(char ch) {
143: return m_data_[getLeadOffset(ch)];
144: }
145:
146: /**
147: * Get the value associated with the BMP code point.
148: * Lead surrogate code points are treated as normal code points, with
149: * unfolded values that may differ from getLeadValue() results.
150: * @param ch the input BMP code point
151: * @return trie data value associated with the BMP codepoint
152: * @draft 2.1
153: */
154: public final int getBMPValue(char ch) {
155: return m_data_[getBMPOffset(ch)];
156: }
157:
158: /**
159: * Get the value associated with a pair of surrogates.
160: * @param lead a lead surrogate
161: * @param trail a trail surrogate
162: * @draft 2.1
163: */
164: public final int getSurrogateValue(char lead, char trail) {
165: if (!UTF16.isLeadSurrogate(lead)
166: || !UTF16.isTrailSurrogate(trail)) {
167: throw new IllegalArgumentException(
168: "Argument characters do not form a supplementary character");
169: }
170: // get fold position for the next trail surrogate
171: int offset = getSurrogateOffset(lead, trail);
172:
173: // get the real data from the folded lead/trail units
174: if (offset > 0) {
175: return m_data_[offset];
176: }
177:
178: // return m_initialValue_ if there is an error
179: return m_initialValue_;
180: }
181:
182: /**
183: * Get a value from a folding offset (from the value of a lead surrogate)
184: * and a trail surrogate.
185: * @param leadvalue the value of a lead surrogate that contains the
186: * folding offset
187: * @param trail surrogate
188: * @return trie data value associated with the trail character
189: * @draft 2.1
190: */
191: public final int getTrailValue(int leadvalue, char trail) {
192: if (m_dataManipulate_ == null) {
193: throw new NullPointerException(
194: "The field DataManipulate in this Trie is null");
195: }
196: int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
197: if (offset > 0) {
198: return m_data_[getRawOffset(offset,
199: (char) (trail & SURROGATE_MASK_))];
200: }
201: return m_initialValue_;
202: }
203:
204: /**
205: * <p>Gets the latin 1 fast path value.</p>
206: * <p>Note this only works if latin 1 characters have their own linear
207: * array.</p>
208: * @param ch latin 1 characters
209: * @return value associated with latin character
210: */
211: public final int getLatin1LinearValue(char ch) {
212: return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
213: }
214:
215: /**
216: * Checks if the argument Trie has the same data as this Trie
217: * @param other Trie to check
218: * @return true if the argument Trie has the same data as this Trie, false
219: * otherwise
220: */
221: ///CLOVER:OFF
222: public boolean equals(Object other) {
223: boolean result = super .equals(other);
224: if (result && other instanceof IntTrie) {
225: IntTrie othertrie = (IntTrie) other;
226: if (m_initialValue_ != othertrie.m_initialValue_
227: || !Arrays.equals(m_data_, othertrie.m_data_)) {
228: return false;
229: }
230: return true;
231: }
232: return false;
233: }
234:
235: ///CLOVER:ON
236:
237: // protected methods -----------------------------------------------
238:
239: /**
240: * <p>Parses the input stream and stores its trie content into a index and
241: * data array</p>
242: * @param inputStream data input stream containing trie data
243: * @exception IOException thrown when data reading fails
244: */
245: protected final void unserialize(InputStream inputStream)
246: throws IOException {
247: super .unserialize(inputStream);
248: // one used for initial value
249: m_data_ = new int[m_dataLength_];
250: DataInputStream input = new DataInputStream(inputStream);
251: for (int i = 0; i < m_dataLength_; i++) {
252: m_data_[i] = input.readInt();
253: }
254: m_initialValue_ = m_data_[0];
255: }
256:
257: /**
258: * Gets the offset to the data which the surrogate pair points to.
259: * @param lead lead surrogate
260: * @param trail trailing surrogate
261: * @return offset to data
262: * @draft 2.1
263: */
264: protected final int getSurrogateOffset(char lead, char trail) {
265: if (m_dataManipulate_ == null) {
266: throw new NullPointerException(
267: "The field DataManipulate in this Trie is null");
268: }
269: // get fold position for the next trail surrogate
270: int offset = m_dataManipulate_
271: .getFoldingOffset(getLeadValue(lead));
272:
273: // get the real data from the folded lead/trail units
274: if (offset > 0) {
275: return getRawOffset(offset,
276: (char) (trail & SURROGATE_MASK_));
277: }
278:
279: // return -1 if there is an error, in this case we return the default
280: // value: m_initialValue_
281: return -1;
282: }
283:
284: /**
285: * Gets the value at the argument index.
286: * For use internally in TrieIterator
287: * @param index value at index will be retrieved
288: * @return 32 bit value
289: * @see com.ibm.icu.impl.TrieIterator
290: * @draft 2.1
291: */
292: protected final int getValue(int index) {
293: return m_data_[index];
294: }
295:
296: /**
297: * Gets the default initial value
298: * @return 32 bit value
299: * @draft 2.1
300: */
301: protected final int getInitialValue() {
302: return m_initialValue_;
303: }
304:
305: // package private methods -----------------------------------------
306:
307: /**
308: * Internal constructor for builder use
309: * @param index the index array to be slotted into this trie
310: * @param data the data array to be slotted into this trie
311: * @param initialvalue the initial value for this trie
312: * @param options trie options to use
313: * @param datamanipulate folding implementation
314: */
315: IntTrie(char index[], int data[], int initialvalue, int options,
316: DataManipulate datamanipulate) {
317: super (index, options, datamanipulate);
318: m_data_ = data;
319: m_dataLength_ = m_data_.length;
320: m_initialValue_ = initialvalue;
321: }
322:
323: // private data members --------------------------------------------
324:
325: /**
326: * Default value
327: */
328: private int m_initialValue_;
329: /**
330: * Array of char data
331: */
332: private int m_data_[];
333: }
|