001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.util;
008:
009: import com.ibm.icu.impl.Utility;
010:
011: /**
012: * class CompactATypeArray : use only on primitive data types
013: * Provides a compact way to store information that is indexed by Unicode
014: * values, such as character properties, types, keyboard values, etc.This
015: * is very useful when you have a block of Unicode data that contains
016: * significant values while the rest of the Unicode data is unused in the
017: * application or when you have a lot of redundance, such as where all 21,000
018: * Han ideographs have the same value. However, lookup is much faster than a
019: * hash table.
020: * A compact array of any primitive data type serves two purposes:
021: * <UL type = round>
022: * <LI>Fast access of the indexed values.
023: * <LI>Smaller memory footprint.
024: * </UL>
025: * A compact array is composed of a index array and value array. The index
026: * array contains the indicies of Unicode characters to the value array.
027: *
028: * @see CompactCharArray
029: * @author Helena Shih
030: * @internal
031: * @deprecated This API is ICU internal only.
032: */
033: public final class CompactByteArray implements Cloneable {
034:
035: /**
036: * The total number of Unicode characters.
037: * @internal
038: * @deprecated This API is ICU internal only.
039: */
040: public static final int UNICODECOUNT = 65536;
041:
042: /**
043: * Default constructor for CompactByteArray, the default value of the
044: * compact array is 0.
045: * @internal
046: * @deprecated This API is ICU internal only.
047: */
048: public CompactByteArray() {
049: this ((byte) 0);
050: }
051:
052: /**
053: * Constructor for CompactByteArray.
054: * @param defaultValue the default value of the compact array.
055: * @internal
056: * @deprecated This API is ICU internal only.
057: */
058: public CompactByteArray(byte defaultValue) {
059: int i;
060: values = new byte[UNICODECOUNT];
061: indices = new char[INDEXCOUNT];
062: hashes = new int[INDEXCOUNT];
063: for (i = 0; i < UNICODECOUNT; ++i) {
064: values[i] = defaultValue;
065: }
066: for (i = 0; i < INDEXCOUNT; ++i) {
067: indices[i] = (char) (i << BLOCKSHIFT);
068: hashes[i] = 0;
069: }
070: isCompact = false;
071:
072: this .defaultValue = defaultValue;
073: }
074:
075: /**
076: * Constructor for CompactByteArray.
077: * @param indexArray the indicies of the compact array.
078: * @param newValues the values of the compact array.
079: * @exception IllegalArgumentException If the index is out of range.
080: * @internal
081: * @deprecated This API is ICU internal only.
082: */
083: public CompactByteArray(char indexArray[], byte newValues[]) {
084: int i;
085: if (indexArray.length != INDEXCOUNT)
086: throw new IllegalArgumentException("Index out of bounds.");
087: for (i = 0; i < INDEXCOUNT; ++i) {
088: char index = indexArray[i];
089: if ((index < 0) || (index >= newValues.length + BLOCKCOUNT))
090: throw new IllegalArgumentException(
091: "Index out of bounds.");
092: }
093: indices = indexArray;
094: values = newValues;
095: isCompact = true;
096: }
097:
098: /**
099: * Constructor for CompactByteArray.
100: *
101: * @param indexArray the RLE-encoded indicies of the compact array.
102: * @param valueArray the RLE-encoded values of the compact array.
103: *
104: * @throws IllegalArgumentException if the index or value array is
105: * the wrong size.
106: * @internal
107: * @deprecated This API is ICU internal only.
108: */
109: public CompactByteArray(String indexArray, String valueArray) {
110: this (Utility.RLEStringToCharArray(indexArray), Utility
111: .RLEStringToByteArray(valueArray));
112: }
113:
114: /**
115: * Get the mapped value of a Unicode character.
116: * @param index the character to get the mapped value with
117: * @return the mapped value of the given character
118: * @internal
119: * @deprecated This API is ICU internal only.
120: */
121: public byte elementAt(char index) {
122: return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
123: + (index & BLOCKMASK)]);
124: }
125:
126: /**
127: * Set a new value for a Unicode character.
128: * Set automatically expands the array if it is compacted.
129: * @param index the character to set the mapped value with
130: * @param value the new mapped value
131: * @internal
132: * @deprecated This API is ICU internal only.
133: */
134: public void setElementAt(char index, byte value) {
135: if (isCompact)
136: expand();
137: values[(int) index] = value;
138: touchBlock(index >> BLOCKSHIFT, value);
139: }
140:
141: /**
142: * Set new values for a range of Unicode character.
143: *
144: * @param start the starting offset of the range
145: * @param end the ending offset of the range
146: * @param value the new mapped value
147: * @internal
148: * @deprecated This API is ICU internal only.
149: */
150: public void setElementAt(char start, char end, byte value) {
151: int i;
152: if (isCompact) {
153: expand();
154: }
155: for (i = start; i <= end; ++i) {
156: values[i] = value;
157: touchBlock(i >> BLOCKSHIFT, value);
158: }
159: }
160:
161: /**
162: * Compact the array.
163: * @internal
164: * @deprecated This API is ICU internal only.
165: */
166: public void compact() {
167: compact(false);
168: }
169:
170: /**
171: * Compact the array.
172: * @internal
173: * @deprecated This API is ICU internal only.
174: */
175: public void compact(boolean exhaustive) {
176: if (!isCompact) {
177: int limitCompacted = 0;
178: int iBlockStart = 0;
179: char iUntouched = 0xFFFF;
180:
181: for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
182: indices[i] = 0xFFFF;
183: boolean touched = blockTouched(i);
184: if (!touched && iUntouched != 0xFFFF) {
185: // If no values in this block were set, we can just set its
186: // index to be the same as some other block with no values
187: // set, assuming we've seen one yet.
188: indices[i] = iUntouched;
189: } else {
190: int jBlockStart = 0;
191: int j = 0;
192: for (j = 0; j < limitCompacted; ++j, jBlockStart += BLOCKCOUNT) {
193: if (hashes[i] == hashes[j]
194: && arrayRegionMatches(values,
195: iBlockStart, values,
196: jBlockStart, BLOCKCOUNT)) {
197: indices[i] = (char) jBlockStart;
198: break;
199: }
200: }
201: if (indices[i] == 0xFFFF) {
202: // we didn't match, so copy & update
203: System.arraycopy(values, iBlockStart, values,
204: jBlockStart, BLOCKCOUNT);
205: indices[i] = (char) jBlockStart;
206: hashes[j] = hashes[i];
207: ++limitCompacted;
208:
209: if (!touched) {
210: // If this is the first untouched block we've seen,
211: // remember its index.
212: iUntouched = (char) jBlockStart;
213: }
214: }
215: }
216: }
217: // we are done compacting, so now make the array shorter
218: int newSize = limitCompacted * BLOCKCOUNT;
219: byte[] result = new byte[newSize];
220: System.arraycopy(values, 0, result, 0, newSize);
221: values = result;
222: isCompact = true;
223: hashes = null;
224: }
225: }
226:
227: /**
228: * Convenience utility to compare two arrays of doubles.
229: * @param len the length to compare.
230: * The start indices and start+len must be valid.
231: */
232: final static boolean arrayRegionMatches(byte[] source,
233: int sourceStart, byte[] target, int targetStart, int len) {
234: int sourceEnd = sourceStart + len;
235: int delta = targetStart - sourceStart;
236: for (int i = sourceStart; i < sourceEnd; i++) {
237: if (source[i] != target[i + delta])
238: return false;
239: }
240: return true;
241: }
242:
243: /**
244: * Remember that a specified block was "touched", i.e. had a value set.
245: * Untouched blocks can be skipped when compacting the array
246: */
247: private final void touchBlock(int i, int value) {
248: hashes[i] = (hashes[i] + (value << 1)) | 1;
249: }
250:
251: /**
252: * Query whether a specified block was "touched", i.e. had a value set.
253: * Untouched blocks can be skipped when compacting the array
254: */
255: private final boolean blockTouched(int i) {
256: return hashes[i] != 0;
257: }
258:
259: /**
260: * For internal use only. Do not modify the result, the behavior of
261: * modified results are undefined.
262: * @internal
263: * @deprecated This API is ICU internal only.
264: */
265: public char[] getIndexArray() {
266: return indices;
267: }
268:
269: /**
270: * For internal use only. Do not modify the result, the behavior of
271: * modified results are undefined.
272: * @internal
273: * @deprecated This API is ICU internal only.
274: */
275: public byte[] getValueArray() {
276: return values;
277: }
278:
279: /**
280: * Overrides Cloneable
281: * @internal
282: * @deprecated This API is ICU internal only.
283: */
284: public Object clone() {
285: try {
286: CompactByteArray other = (CompactByteArray) super .clone();
287: other.values = (byte[]) values.clone();
288: other.indices = (char[]) indices.clone();
289: if (hashes != null)
290: other.hashes = (int[]) hashes.clone();
291: return other;
292: } catch (CloneNotSupportedException e) {
293: throw new IllegalStateException();
294: }
295: }
296:
297: /**
298: * Compares the equality of two compact array objects.
299: * @param obj the compact array object to be compared with this.
300: * @return true if the current compact array object is the same
301: * as the compact array object obj; false otherwise.
302: * @internal
303: * @deprecated This API is ICU internal only.
304: */
305: public boolean equals(Object obj) {
306: if (obj == null)
307: return false;
308: if (this == obj) // quick check
309: return true;
310: if (getClass() != obj.getClass()) // same class?
311: return false;
312: CompactByteArray other = (CompactByteArray) obj;
313: for (int i = 0; i < UNICODECOUNT; i++) {
314: // could be sped up later
315: if (elementAt((char) i) != other.elementAt((char) i))
316: return false;
317: }
318: return true; // we made it through the guantlet.
319: }
320:
321: /**
322: * Generates the hash code for the compact array object
323: * @internal
324: * @deprecated This API is ICU internal only.
325: */
326: public int hashCode() {
327: int result = 0;
328: int increment = Math.min(3, values.length / 16);
329: for (int i = 0; i < values.length; i += increment) {
330: result = result * 37 + values[i];
331: }
332: return result;
333: }
334:
335: // --------------------------------------------------------------
336: // private
337: // --------------------------------------------------------------
338:
339: /**
340: * Expanding takes the array back to a 65536 element array.
341: */
342: private void expand() {
343: int i;
344: if (isCompact) {
345: byte[] tempArray;
346: hashes = new int[INDEXCOUNT];
347: tempArray = new byte[UNICODECOUNT];
348: for (i = 0; i < UNICODECOUNT; ++i) {
349: byte value = elementAt((char) i);
350: tempArray[i] = value;
351: touchBlock(i >> BLOCKSHIFT, value);
352: }
353: for (i = 0; i < INDEXCOUNT; ++i) {
354: indices[i] = (char) (i << BLOCKSHIFT);
355: }
356: values = null;
357: values = tempArray;
358: isCompact = false;
359: }
360: }
361:
362: private static final int BLOCKSHIFT = 7;
363: private static final int BLOCKCOUNT = (1 << BLOCKSHIFT);
364: private static final int INDEXSHIFT = (16 - BLOCKSHIFT);
365: private static final int INDEXCOUNT = (1 << INDEXSHIFT);
366: private static final int BLOCKMASK = BLOCKCOUNT - 1;
367:
368: private byte[] values;
369: private char indices[];
370: private int[] hashes;
371: private boolean isCompact;
372: byte defaultValue;
373: };
|