001: /*
002: *
003: * @(#)CompactByteArray.java 1.22 06/10/10
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996 - All Rights Reserved
032: *
033: * The original version of this source code and documentation is copyrighted
034: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
035: * materials are provided under terms of a License Agreement between Taligent
036: * and Sun. This technology is protected by multiple US and International
037: * patents. This notice and attribution to Taligent may not be removed.
038: * Taligent is a registered trademark of Taligent, Inc.
039: *
040: */
041:
042: package sun.text;
043:
044: /**
045: * class CompactATypeArray : use only on primitive data types
046: * Provides a compact way to store information that is indexed by Unicode
047: * values, such as character properties, types, keyboard values, etc.This
048: * is very useful when you have a block of Unicode data that contains
049: * significant values while the rest of the Unicode data is unused in the
050: * application or when you have a lot of redundance, such as where all 21,000
051: * Han ideographs have the same value. However, lookup is much faster than a
052: * hash table.
053: * A compact array of any primitive data type serves two purposes:
054: * <UL type = round>
055: * <LI>Fast access of the indexed values.
056: * <LI>Smaller memory footprint.
057: * </UL>
058: * A compact array is composed of a index array and value array. The index
059: * array contains the indicies of Unicode characters to the value array.
060: *
061: * @see CompactIntArray
062: * @see CompactShortArray
063: * @version 1.22 10/10/06
064: * @author Helena Shih
065: */
066: public final class CompactByteArray implements Cloneable {
067:
068: /**
069: * The total number of Unicode characters.
070: */
071: public static final int UNICODECOUNT = 65536;
072:
073: /**
074: * Default constructor for CompactByteArray, the default value of the
075: * compact array is 0.
076: */
077: public CompactByteArray() {
078: this ((byte) 0);
079: }
080:
081: /**
082: * Constructor for CompactByteArray.
083: * @param defaultValue the default value of the compact array.
084: */
085: public CompactByteArray(byte defaultValue) {
086: int i;
087: values = new byte[UNICODECOUNT];
088: indices = new short[INDEXCOUNT];
089: hashes = new int[INDEXCOUNT];
090: for (i = 0; i < UNICODECOUNT; ++i) {
091: values[i] = defaultValue;
092: }
093: for (i = 0; i < INDEXCOUNT; ++i) {
094: indices[i] = (short) (i << BLOCKSHIFT);
095: hashes[i] = 0;
096: }
097: isCompact = false;
098: }
099:
100: /**
101: * Constructor for CompactByteArray.
102: * @param indexArray the indicies of the compact array.
103: * @param newValues the values of the compact array.
104: * @exception IllegalArgumentException If index is out of range.
105: */
106: public CompactByteArray(short indexArray[], byte newValues[]) {
107: int i;
108: if (indexArray.length != INDEXCOUNT)
109: throw new IllegalArgumentException("Index out of bounds!");
110: for (i = 0; i < INDEXCOUNT; ++i) {
111: short index = indexArray[i];
112: if ((index < 0) || (index >= newValues.length + BLOCKCOUNT))
113: throw new IllegalArgumentException(
114: "Index out of bounds!");
115: }
116: indices = indexArray;
117: values = newValues;
118: isCompact = true;
119: }
120:
121: /**
122: * Constructor for CompactByteArray.
123: * @param indexArray the RLE-encoded indicies of the compact array.
124: * @param valueArray the RLE-encoded values of the compact array.
125: * @exception IllegalArgumentException If index is out of range.
126: */
127: public CompactByteArray(String indexArray, String valueArray) {
128: this (Utility.RLEStringToShortArray(indexArray), Utility
129: .RLEStringToByteArray(valueArray));
130: }
131:
132: /**
133: * Get the mapped value of a Unicode character.
134: * @param index the character to get the mapped value with
135: * @return the mapped value of the given character
136: */
137: public byte elementAt(char index) {
138: return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
139: + (index & BLOCKMASK)]);
140: }
141:
142: /**
143: * Set a new value for a Unicode character.
144: * Set automatically expands the array if it is compacted.
145: * @param index the character to set the mapped value with
146: * @param value the new mapped value
147: */
148: public void setElementAt(char index, byte value) {
149: if (isCompact)
150: expand();
151: values[(int) index] = value;
152: touchBlock(index >> BLOCKSHIFT, value);
153: }
154:
155: /**
156: * Set new values for a range of Unicode character.
157: * @param start the starting offset o of the range
158: * @param end the ending offset of the range
159: * @param value the new mapped value
160: */
161: public void setElementAt(char start, char end, byte value) {
162: int i;
163: if (isCompact) {
164: expand();
165: }
166: for (i = start; i <= end; ++i) {
167: values[i] = value;
168: touchBlock(i >> BLOCKSHIFT, value);
169: }
170: }
171:
172: /**
173: *Compact the array.
174: */
175: public void compact() {
176: if (!isCompact) {
177: int limitCompacted = 0;
178: int iBlockStart = 0;
179: short iUntouched = -1;
180:
181: for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
182: indices[i] = -1;
183: boolean touched = blockTouched(i);
184: if (!touched && iUntouched != -1) {
185: // If no values in this block were set, we can just set its
186: // index to be the same as some other block with no values
187: // set, assuming we've seen one yet.
188: indices[i] = iUntouched;
189: } else {
190: int jBlockStart = 0;
191: int j = 0;
192: for (j = 0; j < limitCompacted; ++j, jBlockStart += BLOCKCOUNT) {
193: if (hashes[i] == hashes[j]
194: && arrayRegionMatches(values,
195: iBlockStart, values,
196: jBlockStart, BLOCKCOUNT)) {
197: indices[i] = (short) jBlockStart;
198: break;
199: }
200: }
201: if (indices[i] == -1) {
202: // we didn't match, so copy & update
203: System.arraycopy(values, iBlockStart, values,
204: jBlockStart, BLOCKCOUNT);
205: indices[i] = (short) jBlockStart;
206: hashes[j] = hashes[i];
207: ++limitCompacted;
208:
209: if (!touched) {
210: // If this is the first untouched block we've seen,
211: // remember its index.
212: iUntouched = (short) jBlockStart;
213: }
214: }
215: }
216: }
217: // we are done compacting, so now make the array shorter
218: int newSize = limitCompacted * BLOCKCOUNT;
219: byte[] result = new byte[newSize];
220: System.arraycopy(values, 0, result, 0, newSize);
221: values = result;
222: isCompact = true;
223: hashes = null;
224: }
225: }
226:
227: /**
228: * Convenience utility to compare two arrays of doubles.
229: * @param len the length to compare.
230: * The start indices and start+len must be valid.
231: */
232: final static boolean arrayRegionMatches(byte[] source,
233: int sourceStart, byte[] target, int targetStart, int len) {
234: int sourceEnd = sourceStart + len;
235: int delta = targetStart - sourceStart;
236: for (int i = sourceStart; i < sourceEnd; i++) {
237: if (source[i] != target[i + delta])
238: return false;
239: }
240: return true;
241: }
242:
243: /**
244: * Remember that a specified block was "touched", i.e. had a value set.
245: * Untouched blocks can be skipped when compacting the array
246: */
247: private final void touchBlock(int i, int value) {
248: hashes[i] = (hashes[i] + (value << 1)) | 1;
249: }
250:
251: /**
252: * Query whether a specified block was "touched", i.e. had a value set.
253: * Untouched blocks can be skipped when compacting the array
254: */
255: private final boolean blockTouched(int i) {
256: return hashes[i] != 0;
257: }
258:
259: /** For internal use only. Do not modify the result, the behavior of
260: * modified results are undefined.
261: */
262: public short getIndexArray()[] {
263: return indices;
264: }
265:
266: /** For internal use only. Do not modify the result, the behavior of
267: * modified results are undefined.
268: */
269: public byte getStringArray()[] {
270: return values;
271: }
272:
273: /**
274: * Overrides Cloneable
275: */
276: public Object clone() {
277: try {
278: CompactByteArray other = (CompactByteArray) super .clone();
279: other.values = (byte[]) values.clone();
280: other.indices = (short[]) indices.clone();
281: if (hashes != null)
282: other.hashes = (int[]) hashes.clone();
283: return other;
284: } catch (CloneNotSupportedException e) {
285: throw new InternalError();
286: }
287: }
288:
289: /**
290: * Compares the equality of two compact array objects.
291: * @param obj the compact array object to be compared with this.
292: * @return true if the current compact array object is the same
293: * as the compact array object obj; false otherwise.
294: */
295: public boolean equals(Object obj) {
296: if (obj == null)
297: return false;
298: if (this == obj) // quick check
299: return true;
300: if (getClass() != obj.getClass()) // same class?
301: return false;
302: CompactByteArray other = (CompactByteArray) obj;
303: for (int i = 0; i < UNICODECOUNT; i++) {
304: // could be sped up later
305: if (elementAt((char) i) != other.elementAt((char) i))
306: return false;
307: }
308: return true; // we made it through the guantlet.
309: }
310:
311: /**
312: * Generates the hash code for the compact array object
313: */
314:
315: public int hashCode() {
316: int result = 0;
317: int increment = Math.min(3, values.length / 16);
318: for (int i = 0; i < values.length; i += increment) {
319: result = result * 37 + values[i];
320: }
321: return result;
322: }
323:
324: // --------------------------------------------------------------
325: // package private
326: // --------------------------------------------------------------
327: /**
328: * Expanding takes the array back to a 65536 element array.
329: */
330: private void expand() {
331: int i;
332: if (isCompact) {
333: byte[] tempArray;
334: hashes = new int[INDEXCOUNT];
335: tempArray = new byte[UNICODECOUNT];
336: for (i = 0; i < UNICODECOUNT; ++i) {
337: byte value = elementAt((char) i);
338: tempArray[i] = value;
339: touchBlock(i >> BLOCKSHIFT, value);
340: }
341: for (i = 0; i < INDEXCOUNT; ++i) {
342: indices[i] = (short) (i << BLOCKSHIFT);
343: }
344: values = null;
345: values = tempArray;
346: isCompact = false;
347: }
348: }
349:
350: private byte[] getArray() {
351: return values;
352: }
353:
354: private static final int BLOCKSHIFT = 7;
355: private static final int BLOCKCOUNT = (1 << BLOCKSHIFT);
356: private static final int INDEXSHIFT = (16 - BLOCKSHIFT);
357: private static final int INDEXCOUNT = (1 << INDEXSHIFT);
358: private static final int BLOCKMASK = BLOCKCOUNT - 1;
359:
360: private byte[] values; // char -> short (char parameterized short)
361: private short indices[];
362: private boolean isCompact;
363: private int[] hashes;
364: };
|