001: /*
002: ******************************************************************************
003: * Copyright (C) 1996-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: ******************************************************************************
006: */
007:
008: package com.ibm.icu.impl;
009:
010: import java.io.InputStream;
011: import java.io.DataInputStream;
012: import java.io.IOException;
013:
014: import com.ibm.icu.text.UTF16;
015:
016: /**
017: * Trie implementation which stores data in char, 16 bits.
018: * @author synwee
019: * @see com.ibm.icu.impl.Trie
020: * @since release 2.1, Jan 01 2002
021: */
022:
023: // note that i need to handle the block calculations later, since chartrie
024: // in icu4c uses the same index array.
025: public class CharTrie extends Trie {
026: // public constructors ---------------------------------------------
027:
028: /**
029: * <p>Creates a new Trie with the settings for the trie data.</p>
030: * <p>Unserialize the 32-bit-aligned input stream and use the data for the
031: * trie.</p>
032: * @param inputStream file input stream to a ICU data file, containing
033: * the trie
034: * @param dataManipulate object which provides methods to parse the char
035: * data
036: * @throws IOException thrown when data reading fails
037: * @draft 2.1
038: */
039: public CharTrie(InputStream inputStream,
040: DataManipulate dataManipulate) throws IOException {
041: super (inputStream, dataManipulate);
042:
043: if (!isCharTrie()) {
044: throw new IllegalArgumentException(
045: "Data given does not belong to a char trie.");
046: }
047: m_friendAgent_ = new FriendAgent();
048: }
049:
050: /**
051: * Make a dummy CharTrie.
052: * A dummy trie is an empty runtime trie, used when a real data trie cannot
053: * be loaded.
054: *
055: * The trie always returns the initialValue,
056: * or the leadUnitValue for lead surrogate code points.
057: * The Latin-1 part is always set up to be linear.
058: *
059: * @param initialValue the initial value that is set for all code points
060: * @param leadUnitValue the value for lead surrogate code _units_ that do not
061: * have associated supplementary data
062: * @param dataManipulate object which provides methods to parse the char data
063: */
064: public CharTrie(int initialValue, int leadUnitValue,
065: DataManipulate dataManipulate) {
066: super (new char[BMP_INDEX_LENGTH + SURROGATE_BLOCK_COUNT],
067: HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
068:
069: int dataLength, latin1Length, i, limit;
070: char block;
071:
072: /* calculate the actual size of the dummy trie data */
073:
074: /* max(Latin-1, block 0) */
075: dataLength = latin1Length = INDEX_STAGE_1_SHIFT_ <= 8 ? 256
076: : DATA_BLOCK_LENGTH;
077: if (leadUnitValue != initialValue) {
078: dataLength += DATA_BLOCK_LENGTH;
079: }
080: m_data_ = new char[dataLength];
081: m_dataLength_ = dataLength;
082:
083: m_initialValue_ = (char) initialValue;
084:
085: /* fill the index and data arrays */
086:
087: /* indexes are preset to 0 (block 0) */
088:
089: /* Latin-1 data */
090: for (i = 0; i < latin1Length; ++i) {
091: m_data_[i] = (char) initialValue;
092: }
093:
094: if (leadUnitValue != initialValue) {
095: /* indexes for lead surrogate code units to the block after Latin-1 */
096: block = (char) (latin1Length >> INDEX_STAGE_2_SHIFT_);
097: i = 0xd800 >> INDEX_STAGE_1_SHIFT_;
098: limit = 0xdc00 >> INDEX_STAGE_1_SHIFT_;
099: for (; i < limit; ++i) {
100: m_index_[i] = block;
101: }
102:
103: /* data for lead surrogate code units */
104: limit = latin1Length + DATA_BLOCK_LENGTH;
105: for (i = latin1Length; i < limit; ++i) {
106: m_data_[i] = (char) leadUnitValue;
107: }
108: }
109:
110: m_friendAgent_ = new FriendAgent();
111: }
112:
113: /**
114: * Java friend implementation
115: */
116: public class FriendAgent {
117: /**
118: * Gives out the index array of the trie
119: * @return index array of trie
120: */
121: public char[] getPrivateIndex() {
122: return m_index_;
123: }
124:
125: /**
126: * Gives out the data array of the trie
127: * @return data array of trie
128: */
129: public char[] getPrivateData() {
130: return m_data_;
131: }
132:
133: /**
134: * Gives out the data offset in the trie
135: * @return data offset in the trie
136: */
137: public int getPrivateInitialValue() {
138: return m_initialValue_;
139: }
140: }
141:
142: // public methods --------------------------------------------------
143:
144: /**
145: * Java friend implementation
146: * To store the index and data array into the argument.
147: * @param friend java friend UCharacterProperty object to store the array
148: */
149: public void putIndexData(UCharacterProperty friend) {
150: friend.setIndexData(m_friendAgent_);
151: }
152:
153: /**
154: * Gets the value associated with the codepoint.
155: * If no value is associated with the codepoint, a default value will be
156: * returned.
157: * @param ch codepoint
158: * @return offset to data
159: * @draft 2.1
160: */
161: public final char getCodePointValue(int ch) {
162: int offset;
163:
164: // fastpath for U+0000..U+D7FF
165: if (0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
166: // copy of getRawOffset()
167: offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
168: + (ch & INDEX_STAGE_3_MASK_);
169: return m_data_[offset];
170: }
171:
172: // handle U+D800..U+10FFFF
173: offset = getCodePointOffset(ch);
174:
175: // return -1 if there is an error, in this case we return the default
176: // value: m_initialValue_
177: return (offset >= 0) ? m_data_[offset] : m_initialValue_;
178: }
179:
180: /**
181: * Gets the value to the data which this lead surrogate character points
182: * to.
183: * Returned data may contain folding offset information for the next
184: * trailing surrogate character.
185: * This method does not guarantee correct results for trail surrogates.
186: * @param ch lead surrogate character
187: * @return data value
188: * @draft 2.1
189: */
190: public final char getLeadValue(char ch) {
191: return m_data_[getLeadOffset(ch)];
192: }
193:
194: /**
195: * Get the value associated with the BMP code point.
196: * Lead surrogate code points are treated as normal code points, with
197: * unfolded values that may differ from getLeadValue() results.
198: * @param ch the input BMP code point
199: * @return trie data value associated with the BMP codepoint
200: * @draft 2.1
201: */
202: public final char getBMPValue(char ch) {
203: return m_data_[getBMPOffset(ch)];
204: }
205:
206: /**
207: * Get the value associated with a pair of surrogates.
208: * @param lead a lead surrogate
209: * @param trail a trail surrogate
210: * @draft 2.1
211: */
212: public final char getSurrogateValue(char lead, char trail) {
213: int offset = getSurrogateOffset(lead, trail);
214: if (offset > 0) {
215: return m_data_[offset];
216: }
217: return m_initialValue_;
218: }
219:
220: /**
221: * <p>Get a value from a folding offset (from the value of a lead surrogate)
222: * and a trail surrogate.</p>
223: * <p>If the
224: * @param leadvalue value associated with the lead surrogate which contains
225: * the folding offset
226: * @param trail surrogate
227: * @return trie data value associated with the trail character
228: * @draft 2.1
229: */
230: public final char getTrailValue(int leadvalue, char trail) {
231: if (m_dataManipulate_ == null) {
232: throw new NullPointerException(
233: "The field DataManipulate in this Trie is null");
234: }
235: int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
236: if (offset > 0) {
237: return m_data_[getRawOffset(offset,
238: (char) (trail & SURROGATE_MASK_))];
239: }
240: return m_initialValue_;
241: }
242:
243: /**
244: * <p>Gets the latin 1 fast path value.</p>
245: * <p>Note this only works if latin 1 characters have their own linear
246: * array.</p>
247: * @param ch latin 1 characters
248: * @return value associated with latin character
249: */
250: public final char getLatin1LinearValue(char ch) {
251: return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
252: }
253:
254: /**
255: * Checks if the argument Trie has the same data as this Trie
256: * @param other Trie to check
257: * @return true if the argument Trie has the same data as this Trie, false
258: * otherwise
259: */
260: ///CLOVER:OFF
261: public boolean equals(Object other) {
262: boolean result = super .equals(other);
263: if (result && other instanceof CharTrie) {
264: CharTrie othertrie = (CharTrie) other;
265: return m_initialValue_ == othertrie.m_initialValue_;
266: }
267: return false;
268: }
269:
270: ///CLOVER:ON
271:
272: // protected methods -----------------------------------------------
273:
274: /**
275: * <p>Parses the input stream and stores its trie content into a index and
276: * data array</p>
277: * @param inputStream data input stream containing trie data
278: * @exception IOException thrown when data reading fails
279: */
280: protected final void unserialize(InputStream inputStream)
281: throws IOException {
282: DataInputStream input = new DataInputStream(inputStream);
283: int indexDataLength = m_dataOffset_ + m_dataLength_;
284: m_index_ = new char[indexDataLength];
285: for (int i = 0; i < indexDataLength; i++) {
286: m_index_[i] = input.readChar();
287: }
288: m_data_ = m_index_;
289: m_initialValue_ = m_data_[m_dataOffset_];
290: }
291:
292: /**
293: * Gets the offset to the data which the surrogate pair points to.
294: * @param lead lead surrogate
295: * @param trail trailing surrogate
296: * @return offset to data
297: * @draft 2.1
298: */
299: protected final int getSurrogateOffset(char lead, char trail) {
300: if (m_dataManipulate_ == null) {
301: throw new NullPointerException(
302: "The field DataManipulate in this Trie is null");
303: }
304:
305: // get fold position for the next trail surrogate
306: int offset = m_dataManipulate_
307: .getFoldingOffset(getLeadValue(lead));
308:
309: // get the real data from the folded lead/trail units
310: if (offset > 0) {
311: return getRawOffset(offset,
312: (char) (trail & SURROGATE_MASK_));
313: }
314:
315: // return -1 if there is an error, in this case we return the default
316: // value: m_initialValue_
317: return -1;
318: }
319:
320: /**
321: * Gets the value at the argument index.
322: * For use internally in TrieIterator.
323: * @param index value at index will be retrieved
324: * @return 32 bit value
325: * @see com.ibm.icu.impl.TrieIterator
326: * @draft 2.1
327: */
328: protected final int getValue(int index) {
329: return m_data_[index];
330: }
331:
332: /**
333: * Gets the default initial value
334: * @return 32 bit value
335: * @draft 2.1
336: */
337: protected final int getInitialValue() {
338: return m_initialValue_;
339: }
340:
341: // private data members --------------------------------------------
342:
343: /**
344: * Default value
345: */
346: private char m_initialValue_;
347: /**
348: * Array of char data
349: */
350: private char m_data_[];
351: /**
352: * Agent for friends
353: */
354: private FriendAgent m_friendAgent_;
355: }
|