001: /*
002: *
003: * @(#)Utility.java 1.17 06/10/10
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
032: *
033: * The original version of this source code and documentation
034: * is copyrighted and owned by Taligent, Inc., a wholly-owned
035: * subsidiary of IBM. These materials are provided under terms
036: * of a License Agreement between Taligent and Sun. This technology
037: * is protected by multiple US and International patents.
038: *
039: * This notice and attribution to Taligent may not be removed.
040: * Taligent is a registered trademark of Taligent, Inc.
041: *
042: */
043:
044: package sun.text;
045:
046: public final class Utility {
047:
048: /**
049: * Convenience utility to compare two Object[]s.
050: * Ought to be in System
051: */
052: public final static boolean arrayEquals(Object[] source,
053: Object target) {
054: if (source == null)
055: return (target == null);
056: if (!(target instanceof Object[]))
057: return false;
058: Object[] targ = (Object[]) target;
059: return (source.length == targ.length && arrayRegionMatches(
060: source, 0, targ, 0, source.length));
061: }
062:
063: /**
064: * Convenience utility to compare two int[]s
065: * Ought to be in System
066: */
067: public final static boolean arrayEquals(int[] source, Object target) {
068: if (source == null)
069: return (target == null);
070: if (!(target instanceof int[]))
071: return false;
072: int[] targ = (int[]) target;
073: return (source.length == targ.length && arrayRegionMatches(
074: source, 0, targ, 0, source.length));
075: }
076:
077: /**
078: * Convenience utility to compare two double[]s
079: * Ought to be in System
080: */
081: public final static boolean arrayEquals(double[] source,
082: Object target) {
083: if (source == null)
084: return (target == null);
085: if (!(target instanceof double[]))
086: return false;
087: double[] targ = (double[]) target;
088: return (source.length == targ.length && arrayRegionMatches(
089: source, 0, targ, 0, source.length));
090: }
091:
092: /**
093: * Convenience utility to compare two Object[]s
094: * Ought to be in System
095: */
096: public final static boolean arrayEquals(Object source, Object target) {
097: if (source == null)
098: return (target == null);
099: // for some reason, the correct arrayEquals is not being called
100: // so do it by hand for now.
101: if (source instanceof Object[])
102: return (arrayEquals((Object[]) source, target));
103: if (source instanceof int[])
104: return (arrayEquals((int[]) source, target));
105: if (source instanceof double[])
106: return (arrayEquals((int[]) source, target));
107: return source.equals(target);
108: }
109:
110: /**
111: * Convenience utility to compare two Object[]s
112: * Ought to be in System.
113: * @param len the length to compare.
114: * The start indices and start+len must be valid.
115: */
116: public final static boolean arrayRegionMatches(Object[] source,
117: int sourceStart, Object[] target, int targetStart, int len) {
118: int sourceEnd = sourceStart + len;
119: int delta = targetStart - sourceStart;
120: for (int i = sourceStart; i < sourceEnd; i++) {
121: if (!arrayEquals(source[i], target[i + delta]))
122: return false;
123: }
124: return true;
125: }
126:
127: /**
128: * Convenience utility to compare two int[]s.
129: * @param len the length to compare.
130: * The start indices and start+len must be valid.
131: * Ought to be in System
132: */
133: public final static boolean arrayRegionMatches(int[] source,
134: int sourceStart, int[] target, int targetStart, int len) {
135: int sourceEnd = sourceStart + len;
136: int delta = targetStart - sourceStart;
137: for (int i = sourceStart; i < sourceEnd; i++) {
138: if (source[i] != target[i + delta])
139: return false;
140: }
141: return true;
142: }
143:
144: /**
145: * Convenience utility to compare two arrays of doubles.
146: * @param len the length to compare.
147: * The start indices and start+len must be valid.
148: * Ought to be in System
149: */
150: public final static boolean arrayRegionMatches(double[] source,
151: int sourceStart, double[] target, int targetStart, int len) {
152: int sourceEnd = sourceStart + len;
153: int delta = targetStart - sourceStart;
154: for (int i = sourceStart; i < sourceEnd; i++) {
155: if (source[i] != target[i + delta])
156: return false;
157: }
158: return true;
159: }
160:
161: /**
162: * Convenience utility. Does null checks on objects, then calls equals.
163: */
164: public final static boolean objectEquals(Object source,
165: Object target) {
166: if (source == null)
167: return (target == null);
168: else
169: return source.equals(target);
170: }
171:
172: /**
173: * The ESCAPE character is used during run-length encoding. It signals
174: * a run of identical chars.
175: */
176: static final char ESCAPE = '\uA5A5';
177:
178: /**
179: * The ESCAPE_BYTE character is used during run-length encoding. It signals
180: * a run of identical bytes.
181: */
182: static final byte ESCAPE_BYTE = (byte) 0xA5;
183:
184: /**
185: * Construct a string representing a short array. Use run-length encoding.
186: * A character represents itself, unless it is the ESCAPE character. Then
187: * the following notations are possible:
188: * ESCAPE ESCAPE ESCAPE literal
189: * ESCAPE n c n instances of character c
190: * Since an encoded run occupies 3 characters, we only encode runs of 4 or
191: * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
192: * If we encounter a run where n == ESCAPE, we represent this as:
193: * c ESCAPE n-1 c
194: * The ESCAPE value is chosen so as not to collide with commonly
195: * seen values.
196: */
197: public static final String arrayToRLEString(short[] a) {
198: StringBuffer buffer = new StringBuffer();
199: // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
200: buffer.append((char) (a.length >> 16));
201: buffer.append((char) a.length);
202: short runValue = a[0];
203: int runLength = 1;
204: for (int i = 1; i < a.length; ++i) {
205: short s = a[i];
206: if (s == runValue && runLength < 0xFFFF) {
207: ++runLength;
208: } else {
209: encodeRun(buffer, runValue, runLength);
210: runValue = s;
211: runLength = 1;
212: }
213: }
214: encodeRun(buffer, runValue, runLength);
215: return buffer.toString();
216: }
217:
218: /**
219: * Construct a string representing a byte array. Use run-length encoding.
220: * Two bytes are packed into a single char, with a single extra zero byte at
221: * the end if needed. A byte represents itself, unless it is the
222: * ESCAPE_BYTE. Then the following notations are possible:
223: * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
224: * ESCAPE_BYTE n b n instances of byte b
225: * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
226: * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
227: * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
228: * b ESCAPE_BYTE n-1 b
229: * The ESCAPE_BYTE value is chosen so as not to collide with commonly
230: * seen values.
231: */
232: public static final String arrayToRLEString(byte[] a) {
233: StringBuffer buffer = new StringBuffer();
234: buffer.append((char) (a.length >> 16));
235: buffer.append((char) a.length);
236: byte runValue = a[0];
237: int runLength = 1;
238: byte[] state = new byte[2];
239: for (int i = 1; i < a.length; ++i) {
240: byte b = a[i];
241: if (b == runValue && runLength < 0xFF) {
242: ++runLength;
243: } else {
244: encodeRun(buffer, runValue, runLength, state);
245: runValue = b;
246: runLength = 1;
247: }
248: }
249: encodeRun(buffer, runValue, runLength, state);
250:
251: // We must save the final byte, if there is one, by padding
252: // an extra zero.
253: if (state[0] != 0) {
254: appendEncodedByte(buffer, (byte) 0, state);
255: }
256:
257: return buffer.toString();
258: }
259:
260: /**
261: * Construct a string representing a char array. Use run-length encoding.
262: * A character represents itself, unless it is the ESCAPE character. Then
263: * the following notations are possible:
264: * ESCAPE ESCAPE ESCAPE literal
265: * ESCAPE n c n instances of character c
266: * Since an encoded run occupies 3 characters, we only encode runs of 4 or
267: * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
268: * If we encounter a run where n == ESCAPE, we represent this as:
269: * c ESCAPE n-1 c
270: * The ESCAPE value is chosen so as not to collide with commonly
271: * seen values.
272: */
273: public static final String arrayToRLEString(char[] a) {
274: StringBuffer buffer = new StringBuffer();
275: buffer.append((char) (a.length >> 16));
276: buffer.append((char) a.length);
277: char runValue = a[0];
278: int runLength = 1;
279: for (int i = 1; i < a.length; ++i) {
280: char s = a[i];
281: if (s == runValue && runLength < 0xFFFF)
282: ++runLength;
283: else {
284: encodeRun(buffer, (short) runValue, runLength);
285: runValue = s;
286: runLength = 1;
287: }
288: }
289: encodeRun(buffer, (short) runValue, runLength);
290: return buffer.toString();
291: }
292:
293: /**
294: * Construct a string representing an int array. Use run-length encoding.
295: * A character represents itself, unless it is the ESCAPE character. Then
296: * the following notations are possible:
297: * ESCAPE ESCAPE ESCAPE literal
298: * ESCAPE n c n instances of character c
299: * Since an encoded run occupies 3 characters, we only encode runs of 4 or
300: * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
301: * If we encounter a run where n == ESCAPE, we represent this as:
302: * c ESCAPE n-1 c
303: * The ESCAPE value is chosen so as not to collide with commonly
304: * seen values.
305: */
306: public static final String arrayToRLEString(int[] a) {
307: StringBuffer buffer = new StringBuffer();
308:
309: appendInt(buffer, a.length);
310: int runValue = a[0];
311: int runLength = 1;
312: for (int i = 1; i < a.length; ++i) {
313: int s = a[i];
314: if (s == runValue && runLength < 0xFFFF) {
315: ++runLength;
316: } else {
317: encodeRun(buffer, runValue, runLength);
318: runValue = s;
319: runLength = 1;
320: }
321: }
322: encodeRun(buffer, runValue, runLength);
323: return buffer.toString();
324: }
325:
326: /**
327: * Encode a run, possibly a degenerate run (of < 4 values).
328: * @param length The length of the run; must be > 0 && <= 0xFFFF.
329: */
330: private static final void encodeRun(StringBuffer buffer,
331: short value, int length) {
332: if (length < 4) {
333: for (int j = 0; j < length; ++j) {
334: if (value == (int) ESCAPE) {
335: buffer.append(ESCAPE);
336: }
337: buffer.append((char) value);
338: }
339: } else {
340: if (length == (int) ESCAPE) {
341: if (value == (int) ESCAPE) {
342: buffer.append(ESCAPE);
343: }
344: buffer.append((char) value);
345: --length;
346: }
347: buffer.append(ESCAPE);
348: buffer.append((char) length);
349: buffer.append((char) value); // Don't need to escape this value
350: }
351: }
352:
353: /**
354: * Encode a run, possibly a degenerate run (of < 4 values).
355: * @param length The length of the run; must be > 0 && <= 0xFF.
356: */
357: private static final void encodeRun(StringBuffer buffer,
358: byte value, int length, byte[] state) {
359: if (length < 4) {
360: for (int j = 0; j < length; ++j) {
361: if (value == ESCAPE_BYTE)
362: appendEncodedByte(buffer, ESCAPE_BYTE, state);
363: appendEncodedByte(buffer, value, state);
364: }
365: } else {
366: if (length == ESCAPE_BYTE) {
367: if (value == ESCAPE_BYTE) {
368: appendEncodedByte(buffer, ESCAPE_BYTE, state);
369: }
370: appendEncodedByte(buffer, value, state);
371: --length;
372: }
373: appendEncodedByte(buffer, ESCAPE_BYTE, state);
374: appendEncodedByte(buffer, (byte) length, state);
375: appendEncodedByte(buffer, value, state); // Don't need to escape this value
376: }
377: }
378:
379: /**
380: * Encode a run, possibly a degenerate run (of < 4 values).
381: * @param length The length of the run; must be > 0 && <= 0xFFFF.
382: */
383: private static final void encodeRun(StringBuffer buffer, int value,
384: int length) {
385: if (length < 4) {
386: for (int j = 0; j < length; ++j) {
387: if (value == ESCAPE) {
388: appendInt(buffer, value);
389: }
390: appendInt(buffer, value);
391: }
392: } else {
393: if (length == (int) ESCAPE) {
394: if (value == (int) ESCAPE) {
395: appendInt(buffer, ESCAPE);
396: }
397: appendInt(buffer, value);
398: --length;
399: }
400: appendInt(buffer, ESCAPE);
401: appendInt(buffer, length);
402: appendInt(buffer, value); // Don't need to escape this value
403: }
404: }
405:
406: private static final void appendInt(StringBuffer buffer, int value) {
407: buffer.append((char) (value >>> 16));
408: buffer.append((char) (value & 0xFFFF));
409: }
410:
411: /**
412: * Append a byte to the given StringBuffer, packing two bytes into each
413: * character. The state parameter maintains intermediary data between
414: * calls.
415: * @param state A two-element array, with state[0] == 0 if this is the
416: * first byte of a pair, or state[0] != 0 if this is the second byte
417: * of a pair, in which case state[1] is the first byte.
418: */
419: private static final void appendEncodedByte(StringBuffer buffer,
420: byte value, byte[] state) {
421: if (state[0] != 0) {
422: char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
423: buffer.append(c);
424: state[0] = 0;
425: } else {
426: state[0] = 1;
427: state[1] = value;
428: }
429: }
430:
431: /**
432: * Construct an array of shorts from a run-length encoded string.
433: */
434: public static final short[] RLEStringToShortArray(String s) {
435: int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
436: short[] array = new short[length];
437: int ai = 0;
438: for (int i = 2; i < s.length(); ++i) {
439: char c = s.charAt(i);
440: if (c == ESCAPE) {
441: c = s.charAt(++i);
442: if (c == ESCAPE) {
443: array[ai++] = (short) c;
444: } else {
445: int runLength = (int) c;
446: short runValue = (short) s.charAt(++i);
447: for (int j = 0; j < runLength; ++j) {
448: array[ai++] = runValue;
449: }
450: }
451: } else {
452: array[ai++] = (short) c;
453: }
454: }
455:
456: if (ai != length) {
457: throw new InternalError(
458: "Bad run-length encoded short array");
459: }
460:
461: return array;
462: }
463:
464: /**
465: * Construct an array of bytes from a run-length encoded string.
466: */
467: public static final byte[] RLEStringToByteArray(String s) {
468: int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
469: byte[] array = new byte[length];
470: boolean nextChar = true;
471: char c = 0;
472: int node = 0;
473: int runLength = 0;
474: int i = 2;
475: for (int ai = 0; ai < length;) {
476: // This part of the loop places the next byte into the local
477: // variable 'b' each time through the loop. It keeps the
478: // current character in 'c' and uses the boolean 'nextChar'
479: // to see if we've taken both bytes out of 'c' yet.
480: byte b;
481: if (nextChar) {
482: c = s.charAt(i++);
483: b = (byte) (c >> 8);
484: nextChar = false;
485: } else {
486: b = (byte) (c & 0xFF);
487: nextChar = true;
488: }
489:
490: // This part of the loop is a tiny state machine which handles
491: // the parsing of the run-length encoding. This would be simpler
492: // if we could look ahead, but we can't, so we use 'node' to
493: // move between three nodes in the state machine.
494: switch (node) {
495: case 0:
496: // Normal idle node
497: if (b == ESCAPE_BYTE) {
498: node = 1;
499: } else {
500: array[ai++] = b;
501: }
502: break;
503: case 1:
504: // We have seen one ESCAPE_BYTE; we expect either a second
505: // one, or a run length and value.
506: if (b == ESCAPE_BYTE) {
507: array[ai++] = ESCAPE_BYTE;
508: node = 0;
509: } else {
510: runLength = b;
511: // Interpret signed byte as unsigned
512: if (runLength < 0) {
513: runLength += 0x100;
514: }
515: node = 2;
516: }
517: break;
518: case 2:
519: // We have seen an ESCAPE_BYTE and length byte. We interpret
520: // the next byte as the value to be repeated.
521: for (int j = 0; j < runLength; ++j) {
522: array[ai++] = b;
523: }
524: node = 0;
525: break;
526: }
527: }
528:
529: if (node != 0) {
530: throw new InternalError("Bad run-length encoded byte array");
531: }
532:
533: if (i != s.length()) {
534: throw new InternalError(
535: "Excess data in RLE byte array string");
536: }
537:
538: return array;
539: }
540:
541: /**
542: * Construct an array of shorts from a run-length encoded string.
543: */
544: static public final char[] RLEStringToCharArray(String s) {
545: int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
546: char[] array = new char[length];
547: int ai = 0;
548: for (int i = 2; i < s.length(); ++i) {
549: char c = s.charAt(i);
550: if (c == ESCAPE) {
551: c = s.charAt(++i);
552: if (c == ESCAPE) {
553: array[ai++] = c;
554: } else {
555: int runLength = (int) c;
556: char runValue = s.charAt(++i);
557: for (int j = 0; j < runLength; ++j)
558: array[ai++] = runValue;
559: }
560: } else {
561: array[ai++] = c;
562: }
563: }
564:
565: if (ai != length)
566: throw new InternalError(
567: "Bad run-length encoded short array");
568:
569: return array;
570: }
571:
572: /**
573: * Construct an array of ints from a run-length encoded string.
574: */
575: static public final int[] RLEStringToIntArray(String s) {
576: int length = getInt(s, 0);
577: int[] array = new int[length];
578: int ai = 0, i = 1;
579:
580: int maxI = s.length() / 2;
581: while (ai < length && i < maxI) {
582: int c = getInt(s, i++);
583:
584: if (c == ESCAPE) {
585: c = getInt(s, i++);
586: if (c == ESCAPE) {
587: array[ai++] = c;
588: } else {
589: int runLength = c;
590: int runValue = getInt(s, i++);
591: for (int j = 0; j < runLength; ++j) {
592: array[ai++] = runValue;
593: }
594: }
595: } else {
596: array[ai++] = c;
597: }
598: }
599:
600: if (ai != length || i != maxI) {
601: throw new InternalError("Bad run-length encoded int array");
602: }
603:
604: return array;
605: }
606:
607: /**
608: * Format a String for representation in a source file. This includes
609: * breaking it into lines escaping characters using octal notation
610: * when necessary (control characters and double quotes).
611: */
612: public static final String formatForSource(String s) {
613: StringBuffer buffer = new StringBuffer();
614: for (int i = 0; i < s.length();) {
615: if (i > 0)
616: buffer.append("+\n");
617: buffer.append(" \"");
618: int count = 11;
619: while (i < s.length() && count < 80) {
620: char c = s.charAt(i++);
621: if (c < '\u0020' || c == '"') {
622: // Represent control characters and the double quote
623: // using octal notation; otherwise the string we form
624: // won't compile, since Unicode escape sequences are
625: // processed before tokenization.
626: buffer.append('\\');
627: buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
628: buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
629: buffer.append(HEX_DIGIT[(c & 0007)]);
630: count += 4;
631: } else if (c <= '\u007E') {
632: buffer.append(c);
633: count += 1;
634: } else {
635: buffer.append("\\u");
636: buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
637: buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
638: buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
639: buffer.append(HEX_DIGIT[(c & 0x000F)]);
640: count += 6;
641: }
642: }
643: buffer.append('"');
644: }
645: return buffer.toString();
646: }
647:
648: public static final String hex(char ch) {
649: StringBuffer buff = new StringBuffer();
650: return hex(ch, buff).toString();
651: }
652:
653: public static final StringBuffer hex(String src, StringBuffer buff) {
654: if (src != null && buff != null) {
655: int strLen = src.length();
656: int x = 0;
657: hex(src.charAt(x), buff);
658: while (x < strLen) {
659: buff.append(',');
660: hex(src.charAt(x++), buff);
661: }
662:
663: }
664: return buff;
665: }
666:
667: public static final String hex(String str) {
668: StringBuffer buff = new StringBuffer();
669: hex(str, buff);
670: return buff.toString();
671: }
672:
673: public static final String hex(StringBuffer buff) {
674: return hex(buff.toString());
675: }
676:
677: public static final StringBuffer hex(char ch, StringBuffer buff) {
678: for (int shift = 12; shift >= 0; shift -= 4) {
679: buff.append(HEX_DIGIT[(byte) ((ch >> shift) & 0x0F)]);
680: }
681: return buff;
682: }
683:
684: static final int getInt(String s, int i) {
685: return (((int) s.charAt(2 * i)) << 16)
686: | (int) s.charAt(2 * i + 1);
687: }
688:
689: static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5',
690: '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
691:
692: }
|