001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.util;
009:
010: import com.ibm.icu.impl.Utility;
011:
012: /**
013: * class CompactATypeArray : use only on primitive data types
014: * Provides a compact way to store information that is indexed by Unicode
015: * values, such as character properties, types, keyboard values, etc.This
016: * is very useful when you have a block of Unicode data that contains
017: * significant values while the rest of the Unicode data is unused in the
018: * application or when you have a lot of redundance, such as where all 21,000
019: * Han ideographs have the same value. However, lookup is much faster than a
020: * hash table.
021: * A compact array of any primitive data type serves two purposes:
022: * <UL type = round>
023: * <LI>Fast access of the indexed values.
024: * <LI>Smaller memory footprint.
025: * </UL>
026: * A compact array is composed of a index array and value array. The index
027: * array contains the indicies of Unicode characters to the value array.
028: * @see CompactByteArray
029: * @author Helena Shih
030: * @internal
031: * @deprecated This API is ICU internal only.
032: */
033: public final class CompactCharArray implements Cloneable {
034:
035: /**
036: * The total number of Unicode characters.
037: * @internal
038: * @deprecated This API is ICU internal only.
039: */
040: public static final int UNICODECOUNT = 65536;
041:
042: /**
043: * Default constructor for CompactCharArray, the default value of the
044: * compact array is 0.
045: * @internal
046: * @deprecated This API is ICU internal only.
047: */
048: public CompactCharArray() {
049: this ((char) 0);
050: }
051:
052: /**
053: * Constructor for CompactCharArray.
054: * @param defaultValue the default value of the compact array.
055: * @internal
056: * @deprecated This API is ICU internal only.
057: */
058: public CompactCharArray(char defaultValue) {
059: int i;
060: values = new char[UNICODECOUNT];
061: indices = new char[INDEXCOUNT];
062: hashes = new int[INDEXCOUNT];
063: for (i = 0; i < UNICODECOUNT; ++i) {
064: values[i] = defaultValue;
065: }
066: for (i = 0; i < INDEXCOUNT; ++i) {
067: indices[i] = (char) (i << BLOCKSHIFT);
068: hashes[i] = 0;
069: }
070: isCompact = false;
071:
072: this .defaultValue = defaultValue;
073: }
074:
075: /**
076: * Constructor for CompactCharArray.
077: * @param indexArray the indicies of the compact array.
078: * @param newValues the values of the compact array.
079: * @exception IllegalArgumentException If the index is out of range.
080: * @internal
081: * @deprecated This API is ICU internal only.
082: */
083: public CompactCharArray(char indexArray[], char newValues[]) {
084: int i;
085: if (indexArray.length != INDEXCOUNT)
086: throw new IllegalArgumentException("Index out of bounds.");
087: for (i = 0; i < INDEXCOUNT; ++i) {
088: char index = indexArray[i];
089: if ((index < 0) || (index >= newValues.length + BLOCKCOUNT))
090: throw new IllegalArgumentException(
091: "Index out of bounds.");
092: }
093: indices = indexArray;
094: values = newValues;
095: isCompact = true;
096: }
097:
098: /**
099: * Constructor for CompactCharArray.
100: *
101: * @param indexArray the RLE-encoded indicies of the compact array.
102: * @param valueArray the RLE-encoded values of the compact array.
103: *
104: * @throws IllegalArgumentException if the index or value array is
105: * the wrong size.
106: * @internal
107: * @deprecated This API is ICU internal only.
108: */
109: public CompactCharArray(String indexArray, String valueArray) {
110: this (Utility.RLEStringToCharArray(indexArray), Utility
111: .RLEStringToCharArray(valueArray));
112: }
113:
114: /**
115: * Get the mapped value of a Unicode character.
116: * @param index the character to get the mapped value with
117: * @return the mapped value of the given character
118: * @internal
119: * @deprecated This API is ICU internal only.
120: */
121: public char elementAt(char index) {
122: int ix = (indices[index >> BLOCKSHIFT] & 0xFFFF)
123: + (index & BLOCKMASK);
124: return ix >= values.length ? defaultValue : values[ix];
125: }
126:
127: /**
128: * Set a new value for a Unicode character.
129: * Set automatically expands the array if it is compacted.
130: * @param index the character to set the mapped value with
131: * @param value the new mapped value
132: * @internal
133: * @deprecated This API is ICU internal only.
134: */
135: public void setElementAt(char index, char value) {
136: if (isCompact)
137: expand();
138: values[(int) index] = value;
139: touchBlock(index >> BLOCKSHIFT, value);
140: }
141:
142: /**
143: * Set new values for a range of Unicode character.
144: *
145: * @param start the starting offset of the range
146: * @param end the ending offset of the range
147: * @param value the new mapped value
148: * @internal
149: * @deprecated This API is ICU internal only.
150: */
151: public void setElementAt(char start, char end, char value) {
152: int i;
153: if (isCompact) {
154: expand();
155: }
156: for (i = start; i <= end; ++i) {
157: values[i] = value;
158: touchBlock(i >> BLOCKSHIFT, value);
159: }
160: }
161:
162: /**
163: * Compact the array
164: * @internal
165: * @deprecated This API is ICU internal only.
166: */
167: public void compact() {
168: compact(true);
169: }
170:
171: /**
172: * Compact the array.
173: * @internal
174: * @deprecated This API is ICU internal only.
175: */
176: public void compact(boolean exhaustive) {
177: if (!isCompact) {
178: int iBlockStart = 0;
179: char iUntouched = 0xFFFF;
180: int newSize = 0;
181:
182: char[] target = exhaustive ? new char[UNICODECOUNT]
183: : values;
184:
185: for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
186: indices[i] = 0xFFFF;
187: boolean touched = blockTouched(i);
188: if (!touched && iUntouched != 0xFFFF) {
189: // If no values in this block were set, we can just set its
190: // index to be the same as some other block with no values
191: // set, assuming we've seen one yet.
192: indices[i] = iUntouched;
193: } else {
194: int jBlockStart = 0;
195: // See if we can find a previously compacted block that's identical
196: for (int j = 0; j < i; ++j, jBlockStart += BLOCKCOUNT) {
197: if (hashes[i] == hashes[j]
198: && arrayRegionMatches(values,
199: iBlockStart, values,
200: jBlockStart, BLOCKCOUNT)) {
201: indices[i] = indices[j];
202: }
203: }
204: if (indices[i] == 0xFFFF) {
205: int dest; // Where to copy
206: if (exhaustive) {
207: // See if we can find some overlap with another block
208: dest = FindOverlappingPosition(iBlockStart,
209: target, newSize);
210: } else {
211: // Just copy to the end; it's quicker
212: dest = newSize;
213: }
214: int limit = dest + BLOCKCOUNT;
215: if (limit > newSize) {
216: for (int j = newSize; j < limit; ++j) {
217: target[j] = values[iBlockStart + j
218: - dest];
219: }
220: newSize = limit;
221: }
222: indices[i] = (char) dest;
223: if (!touched) {
224: // If this is the first untouched block we've seen,
225: // remember its index.
226: iUntouched = (char) jBlockStart;
227: }
228: }
229: }
230: }
231: // we are done compacting, so now make the array shorter
232: char[] result = new char[newSize];
233: System.arraycopy(target, 0, result, 0, newSize);
234: values = result;
235: isCompact = true;
236: hashes = null;
237: }
238: }
239:
240: private int FindOverlappingPosition(int start, char[] tempValues,
241: int tempCount) {
242: for (int i = 0; i < tempCount; i += 1) {
243: int currentCount = BLOCKCOUNT;
244: if (i + BLOCKCOUNT > tempCount) {
245: currentCount = tempCount - i;
246: }
247: if (arrayRegionMatches(values, start, tempValues, i,
248: currentCount))
249: return i;
250: }
251: return tempCount;
252: }
253:
254: /**
255: * Convenience utility to compare two arrays of doubles.
256: * @param len the length to compare.
257: * The start indices and start+len must be valid.
258: */
259: final static boolean arrayRegionMatches(char[] source,
260: int sourceStart, char[] target, int targetStart, int len) {
261: int sourceEnd = sourceStart + len;
262: int delta = targetStart - sourceStart;
263: for (int i = sourceStart; i < sourceEnd; i++) {
264: if (source[i] != target[i + delta])
265: return false;
266: }
267: return true;
268: }
269:
270: /**
271: * Remember that a specified block was "touched", i.e. had a value set.
272: * Untouched blocks can be skipped when compacting the array
273: */
274: private final void touchBlock(int i, int value) {
275: hashes[i] = (hashes[i] + (value << 1)) | 1;
276: }
277:
278: /**
279: * Query whether a specified block was "touched", i.e. had a value set.
280: * Untouched blocks can be skipped when compacting the array
281: */
282: private final boolean blockTouched(int i) {
283: return hashes[i] != 0;
284: }
285:
286: /**
287: * For internal use only. Do not modify the result, the behavior of
288: * modified results are undefined.
289: * @internal
290: * @deprecated This API is ICU internal only.
291: */
292: public char[] getIndexArray() {
293: return indices;
294: }
295:
296: /**
297: * For internal use only. Do not modify the result, the behavior of
298: * modified results are undefined.
299: * @internal
300: * @deprecated This API is ICU internal only.
301: */
302: public char[] getValueArray() {
303: return values;
304: }
305:
306: /**
307: * Overrides Cloneable
308: * @internal
309: * @deprecated This API is ICU internal only.
310: */
311: public Object clone() {
312: try {
313: CompactCharArray other = (CompactCharArray) super .clone();
314: other.values = (char[]) values.clone();
315: other.indices = (char[]) indices.clone();
316: if (hashes != null)
317: other.hashes = (int[]) hashes.clone();
318: return other;
319: } catch (CloneNotSupportedException e) {
320: throw new IllegalStateException();
321: }
322: }
323:
324: /**
325: * Compares the equality of two compact array objects.
326: * @param obj the compact array object to be compared with this.
327: * @return true if the current compact array object is the same
328: * as the compact array object obj; false otherwise.
329: * @internal
330: * @deprecated This API is ICU internal only.
331: */
332: public boolean equals(Object obj) {
333: if (obj == null)
334: return false;
335: if (this == obj) // quick check
336: return true;
337: if (getClass() != obj.getClass()) // same class?
338: return false;
339: CompactCharArray other = (CompactCharArray) obj;
340: for (int i = 0; i < UNICODECOUNT; i++) {
341: // could be sped up later
342: if (elementAt((char) i) != other.elementAt((char) i))
343: return false;
344: }
345: return true; // we made it through the guantlet.
346: }
347:
348: /**
349: * Generates the hash code for the compact array object
350: * @internal
351: * @deprecated This API is ICU internal only.
352: */
353: public int hashCode() {
354: int result = 0;
355: int increment = Math.min(3, values.length / 16);
356: for (int i = 0; i < values.length; i += increment) {
357: result = result * 37 + values[i];
358: }
359: return result;
360: }
361:
362: // --------------------------------------------------------------
363: // private
364: // --------------------------------------------------------------
365:
366: /**
367: * Expanding takes the array back to a 65536 element array.
368: */
369: private void expand() {
370: int i;
371: if (isCompact) {
372: char[] tempArray;
373: hashes = new int[INDEXCOUNT];
374: tempArray = new char[UNICODECOUNT];
375: for (i = 0; i < UNICODECOUNT; ++i) {
376: tempArray[i] = elementAt((char) i);
377: }
378: for (i = 0; i < INDEXCOUNT; ++i) {
379: indices[i] = (char) (i << BLOCKSHIFT);
380: }
381: values = null;
382: values = tempArray;
383: isCompact = false;
384: }
385: }
386:
387: /**
388: * @internal
389: * @deprecated This API is ICU internal only.
390: */
391: public static final int BLOCKSHIFT = 5; // NormalizerBuilder needs - liu
392: static final int BLOCKCOUNT = (1 << BLOCKSHIFT);
393: static final int INDEXSHIFT = (16 - BLOCKSHIFT);
394: static final int INDEXCOUNT = (1 << INDEXSHIFT);
395: static final int BLOCKMASK = BLOCKCOUNT - 1;
396:
397: private char values[];
398: private char indices[];
399: private int[] hashes;
400: private boolean isCompact;
401: char defaultValue;
402: };
|