001: /*
002: * Copyright 1996-2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
028: * (C) Copyright IBM Corp. 1996 - All Rights Reserved
029: *
030: * The original version of this source code and documentation is copyrighted
031: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
032: * materials are provided under terms of a License Agreement between Taligent
033: * and Sun. This technology is protected by multiple US and International
034: * patents. This notice and attribution to Taligent may not be removed.
035: * Taligent is a registered trademark of Taligent, Inc.
036: *
037: */
038:
039: package sun.text;
040:
041: /**
042: * class CompactATypeArray : use only on primitive data types
043: * Provides a compact way to store information that is indexed by Unicode
044: * values, such as character properties, types, keyboard values, etc.This
045: * is very useful when you have a block of Unicode data that contains
046: * significant values while the rest of the Unicode data is unused in the
047: * application or when you have a lot of redundance, such as where all 21,000
048: * Han ideographs have the same value. However, lookup is much faster than a
049: * hash table.
050: * A compact array of any primitive data type serves two purposes:
051: * <UL type = round>
052: * <LI>Fast access of the indexed values.
053: * <LI>Smaller memory footprint.
054: * </UL>
055: * A compact array is composed of a index array and value array. The index
056: * array contains the indicies of Unicode characters to the value array.
057: *
058: * @see CompactIntArray
059: * @see CompactShortArray
060: * @version 1.29 05/05/07
061: * @author Helena Shih
062: */
063: public final class CompactByteArray implements Cloneable {
064:
065: /**
066: * The total number of Unicode characters.
067: */
068: public static final int UNICODECOUNT = 65536;
069:
070: /**
071: * Constructor for CompactByteArray.
072: * @param defaultValue the default value of the compact array.
073: */
074: public CompactByteArray(byte defaultValue) {
075: int i;
076: values = new byte[UNICODECOUNT];
077: indices = new short[INDEXCOUNT];
078: hashes = new int[INDEXCOUNT];
079: for (i = 0; i < UNICODECOUNT; ++i) {
080: values[i] = defaultValue;
081: }
082: for (i = 0; i < INDEXCOUNT; ++i) {
083: indices[i] = (short) (i << BLOCKSHIFT);
084: hashes[i] = 0;
085: }
086: isCompact = false;
087: }
088:
089: /**
090: * Constructor for CompactByteArray.
091: * @param indexArray the indicies of the compact array.
092: * @param newValues the values of the compact array.
093: * @exception IllegalArgumentException If index is out of range.
094: */
095: public CompactByteArray(short indexArray[], byte newValues[]) {
096: int i;
097: if (indexArray.length != INDEXCOUNT)
098: throw new IllegalArgumentException("Index out of bounds!");
099: for (i = 0; i < INDEXCOUNT; ++i) {
100: short index = indexArray[i];
101: if ((index < 0) || (index >= newValues.length + BLOCKCOUNT))
102: throw new IllegalArgumentException(
103: "Index out of bounds!");
104: }
105: indices = indexArray;
106: values = newValues;
107: isCompact = true;
108: }
109:
110: /**
111: * Get the mapped value of a Unicode character.
112: * @param index the character to get the mapped value with
113: * @return the mapped value of the given character
114: */
115: public byte elementAt(char index) {
116: return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
117: + (index & BLOCKMASK)]);
118: }
119:
120: /**
121: * Set a new value for a Unicode character.
122: * Set automatically expands the array if it is compacted.
123: * @param index the character to set the mapped value with
124: * @param value the new mapped value
125: */
126: public void setElementAt(char index, byte value) {
127: if (isCompact)
128: expand();
129: values[(int) index] = value;
130: touchBlock(index >> BLOCKSHIFT, value);
131: }
132:
133: /**
134: * Set new values for a range of Unicode character.
135: * @param start the starting offset o of the range
136: * @param end the ending offset of the range
137: * @param value the new mapped value
138: */
139: public void setElementAt(char start, char end, byte value) {
140: int i;
141: if (isCompact) {
142: expand();
143: }
144: for (i = start; i <= end; ++i) {
145: values[i] = value;
146: touchBlock(i >> BLOCKSHIFT, value);
147: }
148: }
149:
150: /**
151: *Compact the array.
152: */
153: public void compact() {
154: if (!isCompact) {
155: int limitCompacted = 0;
156: int iBlockStart = 0;
157: short iUntouched = -1;
158:
159: for (int i = 0; i < indices.length; ++i, iBlockStart += BLOCKCOUNT) {
160: indices[i] = -1;
161: boolean touched = blockTouched(i);
162: if (!touched && iUntouched != -1) {
163: // If no values in this block were set, we can just set its
164: // index to be the same as some other block with no values
165: // set, assuming we've seen one yet.
166: indices[i] = iUntouched;
167: } else {
168: int jBlockStart = 0;
169: int j = 0;
170: for (j = 0; j < limitCompacted; ++j, jBlockStart += BLOCKCOUNT) {
171: if (hashes[i] == hashes[j]
172: && arrayRegionMatches(values,
173: iBlockStart, values,
174: jBlockStart, BLOCKCOUNT)) {
175: indices[i] = (short) jBlockStart;
176: break;
177: }
178: }
179: if (indices[i] == -1) {
180: // we didn't match, so copy & update
181: System.arraycopy(values, iBlockStart, values,
182: jBlockStart, BLOCKCOUNT);
183: indices[i] = (short) jBlockStart;
184: hashes[j] = hashes[i];
185: ++limitCompacted;
186:
187: if (!touched) {
188: // If this is the first untouched block we've seen,
189: // remember its index.
190: iUntouched = (short) jBlockStart;
191: }
192: }
193: }
194: }
195: // we are done compacting, so now make the array shorter
196: int newSize = limitCompacted * BLOCKCOUNT;
197: byte[] result = new byte[newSize];
198: System.arraycopy(values, 0, result, 0, newSize);
199: values = result;
200: isCompact = true;
201: hashes = null;
202: }
203: }
204:
205: /**
206: * Convenience utility to compare two arrays of doubles.
207: * @param len the length to compare.
208: * The start indices and start+len must be valid.
209: */
210: final static boolean arrayRegionMatches(byte[] source,
211: int sourceStart, byte[] target, int targetStart, int len) {
212: int sourceEnd = sourceStart + len;
213: int delta = targetStart - sourceStart;
214: for (int i = sourceStart; i < sourceEnd; i++) {
215: if (source[i] != target[i + delta])
216: return false;
217: }
218: return true;
219: }
220:
221: /**
222: * Remember that a specified block was "touched", i.e. had a value set.
223: * Untouched blocks can be skipped when compacting the array
224: */
225: private final void touchBlock(int i, int value) {
226: hashes[i] = (hashes[i] + (value << 1)) | 1;
227: }
228:
229: /**
230: * Query whether a specified block was "touched", i.e. had a value set.
231: * Untouched blocks can be skipped when compacting the array
232: */
233: private final boolean blockTouched(int i) {
234: return hashes[i] != 0;
235: }
236:
237: /** For internal use only. Do not modify the result, the behavior of
238: * modified results are undefined.
239: */
240: public short getIndexArray()[] {
241: return indices;
242: }
243:
244: /** For internal use only. Do not modify the result, the behavior of
245: * modified results are undefined.
246: */
247: public byte getStringArray()[] {
248: return values;
249: }
250:
251: /**
252: * Overrides Cloneable
253: */
254: public Object clone() {
255: try {
256: CompactByteArray other = (CompactByteArray) super .clone();
257: other.values = (byte[]) values.clone();
258: other.indices = (short[]) indices.clone();
259: if (hashes != null)
260: other.hashes = (int[]) hashes.clone();
261: return other;
262: } catch (CloneNotSupportedException e) {
263: throw new InternalError();
264: }
265: }
266:
267: /**
268: * Compares the equality of two compact array objects.
269: * @param obj the compact array object to be compared with this.
270: * @return true if the current compact array object is the same
271: * as the compact array object obj; false otherwise.
272: */
273: public boolean equals(Object obj) {
274: if (obj == null)
275: return false;
276: if (this == obj) // quick check
277: return true;
278: if (getClass() != obj.getClass()) // same class?
279: return false;
280: CompactByteArray other = (CompactByteArray) obj;
281: for (int i = 0; i < UNICODECOUNT; i++) {
282: // could be sped up later
283: if (elementAt((char) i) != other.elementAt((char) i))
284: return false;
285: }
286: return true; // we made it through the guantlet.
287: }
288:
289: /**
290: * Generates the hash code for the compact array object
291: */
292:
293: public int hashCode() {
294: int result = 0;
295: int increment = Math.min(3, values.length / 16);
296: for (int i = 0; i < values.length; i += increment) {
297: result = result * 37 + values[i];
298: }
299: return result;
300: }
301:
302: // --------------------------------------------------------------
303: // package private
304: // --------------------------------------------------------------
305: /**
306: * Expanding takes the array back to a 65536 element array.
307: */
308: private void expand() {
309: int i;
310: if (isCompact) {
311: byte[] tempArray;
312: hashes = new int[INDEXCOUNT];
313: tempArray = new byte[UNICODECOUNT];
314: for (i = 0; i < UNICODECOUNT; ++i) {
315: byte value = elementAt((char) i);
316: tempArray[i] = value;
317: touchBlock(i >> BLOCKSHIFT, value);
318: }
319: for (i = 0; i < INDEXCOUNT; ++i) {
320: indices[i] = (short) (i << BLOCKSHIFT);
321: }
322: values = null;
323: values = tempArray;
324: isCompact = false;
325: }
326: }
327:
328: private byte[] getArray() {
329: return values;
330: }
331:
332: private static final int BLOCKSHIFT = 7;
333: private static final int BLOCKCOUNT = (1 << BLOCKSHIFT);
334: private static final int INDEXSHIFT = (16 - BLOCKSHIFT);
335: private static final int INDEXCOUNT = (1 << INDEXSHIFT);
336: private static final int BLOCKMASK = BLOCKCOUNT - 1;
337:
338: private byte[] values; // char -> short (char parameterized short)
339: private short indices[];
340: private boolean isCompact;
341: private int[] hashes;
342: };
|