001: /*
002: *
003: * @(#)CompactCharArray.java 1.22 06/10/10
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996 - All Rights Reserved
032: *
033: * The original version of this source code and documentation is copyrighted
034: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
035: * materials are provided under terms of a License Agreement between Taligent
036: * and Sun. This technology is protected by multiple US and International
037: * patents. This notice and attribution to Taligent may not be removed.
038: * Taligent is a registered trademark of Taligent, Inc.
039: *
040: */
041:
042: package sun.text;
043:
044: /**
045: * class CompactATypeArray : use only on primitive data types
046: * Provides a compact way to store information that is indexed by Unicode
047: * values, such as character properties, types, keyboard values, etc.This
048: * is very useful when you have a block of Unicode data that contains
049: * significant values while the rest of the Unicode data is unused in the
050: * application or when you have a lot of redundance, such as where all 21,000
051: * Han ideographs have the same value. However, lookup is much faster than a
052: * hash table.
053: * A compact array of any primitive data type serves two purposes:
054: * <UL type = round>
055: * <LI>Fast access of the indexed values.
056: * <LI>Smaller memory footprint.
057: * </UL>
058: * A compact array is composed of a index array and value array. The index
059: * array contains the indicies of Unicode characters to the value array.
060: *
061: * @see CompactByteArray
062: * @see CompactIntArray
063: * @see CompactShortArray
064: * @version 1.16 11/28/00
065: * @author Helena Shih
066: */
067: public final class CompactCharArray implements Cloneable {
068:
069: /**
070: * The total number of Unicode characters.
071: */
072: public static final int UNICODECOUNT = 65536;
073:
074: /**
075: * Default constructor for CompactCharArray, the default value of the
076: * compact array is '\u0000'.
077: */
078: public CompactCharArray() {
079: this ('\u0000');
080: }
081:
082: /**
083: * Contructor for CompactCharArray.
084: * @param defaultValue the default value of the compact array.
085: */
086: public CompactCharArray(char defaultValue) {
087: int i;
088: values = new char[UNICODECOUNT];
089: indices = new char[INDEXCOUNT];
090: hashes = new int[INDEXCOUNT];
091: for (i = 0; i < UNICODECOUNT; ++i) {
092: values[i] = defaultValue;
093: }
094: for (i = 0; i < INDEXCOUNT; ++i) {
095: indices[i] = (char) (i << BLOCKSHIFT);
096: hashes[i] = 0;
097: }
098: isCompact = false;
099: this .defaultValue = defaultValue;
100: }
101:
102: /**
103: * Constructor for CompactCharArray.
104: *
105: * @param indexArray the RLE-encoded indicies of the compact array.
106: * @param valueArray the RLE-encoded values of the compact array.
107: *
108: * @throws IllegalArgumentException if the index or value array is
109: * the wrong size.
110: */
111: public CompactCharArray(String indexArray, String valueArray) {
112: this (Utility.RLEStringToCharArray(indexArray), Utility
113: .RLEStringToCharArray(valueArray));
114: }
115:
116: /**
117: * Constructor for CompactCharArray.
118: * @param indexArray the indicies of the compact array.
119: * @param newValues the values of the compact array.
120: * @exception IllegalArgumentException If the index is out of range.
121: */
122: public CompactCharArray(char indexArray[], char newValues[]) {
123: int i;
124: if (indexArray.length != INDEXCOUNT)
125:
126: throw new IllegalArgumentException("Index out of bounds.");
127: for (i = 0; i < INDEXCOUNT; ++i) {
128: char index = indexArray[i];
129: if ((index < 0) || (index >= newValues.length + BLOCKCOUNT))
130: throw new IllegalArgumentException(
131: "Index out of bounds.");
132: }
133: indices = indexArray;
134: values = newValues;
135: isCompact = true;
136: }
137:
138: /**
139: * Constructor for CompactCharArray.
140: * @param indexArray the indicies of the compact array.
141: * @param newValues the values of the compact array.
142: * @exception IllegalArgumentException If the index is out of range.
143: */
144: /*
145: public CompactCharArray(short indexArray[], char newValues[])
146: {
147: int i;
148: if (indexArray.length != INDEXCOUNT)
149: throw new IllegalArgumentException("Index out of bounds.");
150: for (i = 0; i < INDEXCOUNT; ++i) {
151: short index = indexArray[i];
152: if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
153: throw new IllegalArgumentException("Index out of bounds.");
154: }
155: indices = indexArray;
156: values = newValues;
157: isCompact = true;
158: }
159: */
160: /**
161: * Get the mapped value of a Unicode character.
162: * @param index the character to get the mapped value with
163: * @return the mapped value of the given character
164: */
165: public char elementAt(char index) // parameterized on short
166: {
167: return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
168: + (index & BLOCKMASK)]);
169: }
170:
171: /**
172: * Set a new value for a Unicode character.
173: * Set automatically expands the array if it is compacted.
174: * @param index the character to set the mapped value with
175: * @param value the new mapped value
176: */
177: public void setElementAt(char index, char value) {
178: if (isCompact)
179: expand();
180: values[(int) index] = value;
181: touchBlock(index >> BLOCKSHIFT, value);
182: }
183:
184: /**
185: * Set new values for a range of Unicode character.
186: * @param start the starting offset of the range
187: * @param end the endding offset of the range
188: * @param value the new mapped value
189: */
190: public void setElementAt(char start, char end, char value) {
191: int i;
192: if (isCompact) {
193: expand();
194: }
195: for (i = start; i <= end; ++i) {
196: values[i] = value;
197: touchBlock(i >> BLOCKSHIFT, value);
198: }
199: }
200:
201: /**
202: *Compact the array.
203: */
204: public void compact() {
205: if (!isCompact) {
206: int limitCompacted = 0;
207: int iBlockStart = 0;
208: char iUntouched = 0xFFFF;
209:
210: for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
211: indices[i] = 0xFFFF;
212: boolean touched = blockTouched(i);
213: if (!touched && iUntouched != 0xFFFF) {
214: // If no values in this block were set, we can just set its
215: // index to be the same as some other block with no values
216: // set, assuming we've seen one yet.
217: indices[i] = iUntouched;
218: } else {
219: int jBlockStart = 0;
220: int j = 0;
221: for (j = 0; j < limitCompacted; ++j, jBlockStart += BLOCKCOUNT) {
222: if (hashes[i] == hashes[j]
223: && arrayRegionMatches(values,
224: iBlockStart, values,
225: jBlockStart, BLOCKCOUNT)) {
226: indices[i] = (char) jBlockStart;
227: }
228: }
229: if (indices[i] == 0xFFFF) {
230: // we didn't match, so copy & update
231: System.arraycopy(values, iBlockStart, values,
232: jBlockStart, BLOCKCOUNT);
233: indices[i] = (char) jBlockStart;
234: hashes[j] = hashes[i];
235: ++limitCompacted;
236:
237: if (!touched) {
238: // If this is the first untouched block we've seen,
239: // remember its index.
240: iUntouched = (char) jBlockStart;
241: }
242: }
243: }
244: }
245: // we are done compacting, so now make the array shorter
246: int newSize = limitCompacted * BLOCKCOUNT;
247: char[] result = new char[newSize];
248: System.arraycopy(values, 0, result, 0, newSize);
249: values = result;
250: isCompact = true;
251: hashes = null;
252: }
253: }
254:
255: /**
256: * Convenience utility to compare two arrays of doubles.
257: * @param len the length to compare.
258: * The start indices and start+len must be valid.
259: */
260: final static boolean arrayRegionMatches(char[] source,
261: int sourceStart, char[] target, int targetStart, int len) {
262: int sourceEnd = sourceStart + len;
263: int delta = targetStart - sourceStart;
264: for (int i = sourceStart; i < sourceEnd; i++) {
265: if (source[i] != target[i + delta])
266: return false;
267: }
268: return true;
269: }
270:
271: /**
272: * Remember that a specified block was "touched", i.e. had a value set.
273: * Untouched blocks can be skipped when compacting the array
274: */
275: private final void touchBlock(int i, int value) {
276: hashes[i] = (hashes[i] + (value << 1)) | 1;
277: }
278:
279: /**
280: * Query whether a specified block was "touched", i.e. had a value set.
281: * Untouched blocks can be skipped when compacting the array
282: */
283: private final boolean blockTouched(int i) {
284: return hashes[i] != 0;
285: }
286:
287: /** For internal use only. Do not modify the result, the behavior of
288: * modified results are undefined.
289: */
290: public char[] getIndexArray() {
291: return indices;
292: }
293:
294: /** For internal use only. Do not modify the result, the behavior of
295: * modified results are undefined.
296: */
297: public char[] getStringArray() {
298: return values;
299: }
300:
301: /**
302: * Overrides Cloneable
303: */
304: public Object clone() {
305: try {
306: CompactCharArray other = (CompactCharArray) super .clone();
307: other.values = (char[]) values.clone();
308: other.indices = (char[]) indices.clone();
309: if (hashes != null)
310: other.hashes = (int[]) hashes.clone();
311: return other;
312: } catch (CloneNotSupportedException e) {
313: throw new InternalError();
314: }
315: }
316:
317: /**
318: * Compares the equality of two compact array objects.
319: * @param obj the compact array object to be compared with this.
320: * @return true if the current compact array object is the same
321: * as the compact array object obj; false otherwise.
322: */
323: public boolean equals(Object obj) {
324: if (obj == null)
325: return false;
326: if (this == obj) // quick check
327: return true;
328: if (getClass() != obj.getClass()) // same class?
329: return false;
330: CompactCharArray other = (CompactCharArray) obj;
331: for (int i = 0; i < UNICODECOUNT; i++) {
332: // could be sped up later
333: if (elementAt((char) i) != other.elementAt((char) i))
334: return false;
335: }
336: return true; // we made it through the guantlet.
337: }
338:
339: /**
340: * Generates the hash code for the compact array object
341: */
342:
343: public int hashCode() {
344: int result = 0;
345: int increment = Math.min(3, values.length / 16);
346: for (int i = 0; i < values.length; i += increment) {
347: result = result * 37 + values[i];
348: }
349: return result;
350: }
351:
352: // --------------------------------------------------------------
353: // private
354: // --------------------------------------------------------------
355: /**
356: * Expanding takes the array back to a 65536 element array.
357: */
358: private void expand() {
359: int i;
360: if (isCompact) {
361: char[] tempArray;
362: tempArray = new char[UNICODECOUNT];
363: hashes = new int[INDEXCOUNT];
364: for (i = 0; i < UNICODECOUNT; ++i) {
365: char value = elementAt((char) i);
366: tempArray[i] = value;
367: touchBlock(i >> BLOCKSHIFT, value);
368: }
369: for (i = 0; i < INDEXCOUNT; ++i) {
370: indices[i] = (char) (i << BLOCKSHIFT);
371: }
372: values = null;
373: values = tempArray;
374: isCompact = false;
375: }
376: }
377:
378: private char getArrayValue(int n) {
379: return values[n];
380: }
381:
382: private char getIndexArrayValue(int n) {
383: return indices[n];
384: }
385:
386: private static final int BLOCKSHIFT = 5;
387: private static final int BLOCKCOUNT = (1 << BLOCKSHIFT);
388: private static final int INDEXSHIFT = (16 - BLOCKSHIFT);
389: private static final int INDEXCOUNT = (1 << INDEXSHIFT);
390: private static final int BLOCKMASK = BLOCKCOUNT - 1;
391:
392: private char[] values; // char -> short (char parameterized short)
393: private char indices[];
394: private int[] hashes;
395: private boolean isCompact;
396: private char defaultValue;
397: };
|