001: /* Copyright (c) 1995-2000, The Hypersonic SQL Group.
002: * All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * Redistributions of source code must retain the above copyright notice, this
008: * list of conditions and the following disclaimer.
009: *
010: * Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * Neither the name of the Hypersonic SQL Group nor the names of its
015: * contributors may be used to endorse or promote products derived from this
016: * software without specific prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP,
022: * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
023: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
026: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029: *
030: * This software consists of voluntary contributions made by many individuals
031: * on behalf of the Hypersonic SQL Group.
032: *
033: *
034: * For work added by the HSQL Development Group:
035: *
036: * Copyright (c) 2001-2005, The HSQL Development Group
037: * All rights reserved.
038: *
039: * Redistribution and use in source and binary forms, with or without
040: * modification, are permitted provided that the following conditions are met:
041: *
042: * Redistributions of source code must retain the above copyright notice, this
043: * list of conditions and the following disclaimer.
044: *
045: * Redistributions in binary form must reproduce the above copyright notice,
046: * this list of conditions and the following disclaimer in the documentation
047: * and/or other materials provided with the distribution.
048: *
049: * Neither the name of the HSQL Development Group nor the names of its
050: * contributors may be used to endorse or promote products derived from this
051: * software without specific prior written permission.
052: *
053: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
054: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
055: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
056: * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
057: * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
058: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
059: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
060: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
061: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
062: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
063: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
064: */
065:
066: package org.hsqldb.lib;
067:
068: import java.io.IOException;
069: import java.io.InputStream;
070: import java.io.InputStreamReader;
071: import java.io.StringWriter;
072: import java.io.UTFDataFormatException;
073:
074: /**
075: * Collection of static methods for converting strings between different
076: * formats and to and from byte arrays.<p>
077: *
078: * New class, with extensively enhanced and rewritten Hypersonic code.
079: *
080: * @author Thomas Mueller (Hypersonic SQL Group)
081: * @author fredt@users
082: * @version 1.8.0
083: * @since 1.7.2
084: */
085:
086: // fredt@users 20020328 - patch 1.7.0 by fredt - error trapping
087: public class StringConverter {
088:
089: private static final byte[] HEXBYTES = { (byte) '0', (byte) '1',
090: (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6',
091: (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
092: (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f' };
093: private static final String HEXINDEX = "0123456789abcdef0123456789ABCDEF";
094:
095: /**
096: * Converts a String into a byte array by using a big-endian two byte
097: * representation of each char value in the string.
098: */
099: byte[] stringToFullByteArray(String s) {
100:
101: int length = s.length();
102: byte[] buffer = new byte[length * 2];
103: int c;
104:
105: for (int i = 0; i < length; i++) {
106: c = s.charAt(i);
107: buffer[i * 2] = (byte) ((c & 0x0000ff00) >> 8);
108: buffer[i * 2 + 1] = (byte) (c & 0x000000ff);
109: }
110:
111: return buffer;
112: }
113:
114: /**
115: * Compacts a hexadecimal string into a byte array
116: *
117: *
118: * @param s hexadecimal string
119: *
120: * @return byte array for the hex string
121: * @throws IOException
122: */
123: public static byte[] hexToByte(String s) throws IOException {
124:
125: int l = s.length() / 2;
126: byte[] data = new byte[l];
127: int j = 0;
128:
129: if (s.length() % 2 != 0) {
130: throw new IOException(
131: "hexadecimal string with odd number of characters");
132: }
133:
134: for (int i = 0; i < l; i++) {
135: char c = s.charAt(j++);
136: int n, b;
137:
138: n = HEXINDEX.indexOf(c);
139:
140: if (n == -1) {
141: throw new IOException(
142: "hexadecimal string contains non hex character");
143: }
144:
145: b = (n & 0xf) << 4;
146: c = s.charAt(j++);
147: n = HEXINDEX.indexOf(c);
148: b += (n & 0xf);
149: data[i] = (byte) b;
150: }
151:
152: return data;
153: }
154:
155: /**
156: * Converts a byte array into a hexadecimal string
157: *
158: *
159: * @param b byte array
160: *
161: * @return hex string
162: */
163: public static String byteToHex(byte[] b) {
164:
165: int len = b.length;
166: char[] s = new char[len * 2];
167:
168: for (int i = 0, j = 0; i < len; i++) {
169: int c = ((int) b[i]) & 0xff;
170:
171: s[j++] = (char) HEXBYTES[c >> 4 & 0xf];
172: s[j++] = (char) HEXBYTES[c & 0xf];
173: }
174:
175: return new String(s);
176: }
177:
178: /**
179: * Converts a byte array into hexadecimal characters
180: * which are written as ASCII to the given output stream.
181: *
182: * @param o output stream
183: * @param b byte array
184: */
185: public static void writeHex(byte[] o, int from, byte[] b) {
186:
187: int len = b.length;
188:
189: for (int i = 0; i < len; i++) {
190: int c = ((int) b[i]) & 0xff;
191:
192: o[from++] = HEXBYTES[c >> 4 & 0xf];
193: o[from++] = HEXBYTES[c & 0xf];
194: }
195: }
196:
197: public static String byteToString(byte[] b, String charset) {
198:
199: try {
200: return (charset == null) ? new String(b) : new String(b,
201: charset);
202: } catch (Exception e) {
203: }
204:
205: return null;
206: }
207:
208: /**
209: * Converts a Unicode string into UTF8 then convert into a hex string
210: *
211: *
212: * @param s normal Unicode string
213: *
214: * @return hex string representation of UTF8 encoding of the input
215: */
216: public static String unicodeToHexString(String s) {
217:
218: HsqlByteArrayOutputStream bout = new HsqlByteArrayOutputStream();
219:
220: writeUTF(s, bout);
221:
222: return byteToHex(bout.toByteArray());
223: }
224:
225: // fredt@users 20011120 - patch 450455 by kibu@users - modified
226: // method return type changed to HsqlStringBuffer with spare
227: // space for end-of-line characters -- to reduce String concatenation
228:
229: /**
230: * Hsqldb specific encoding used only for log files.
231: *
232: * The SQL statements that need to be written to the log file (input) are
233: * Java Unicode strings. input is converted into a 7bit escaped ASCII
234: * string (output)with the following transformations.
235: * All characters outside the 0x20-7f range are converted to a
236: * escape sequence and added to output.
237: * If a backslash character is immdediately followed by 'u', the
238: * backslash character is converted to escape sequence and
239: * added to output.
240: * All the remaining characters in input are added to output without
241: * conversion.
242: *
243: * The escape sequence is backslash, letter u, xxxx, where xxxx
244: * is the hex representation of the character code.
245: * (fredt@users)
246: *
247: * @param b output stream to wite to
248: * @param s Java Unicode string
249: *
250: * @return number of bytes written out
251: *
252: */
253: public static int unicodeToAscii(HsqlByteArrayOutputStream b,
254: String s, boolean doubleSingleQuotes) {
255:
256: int count = 0;
257:
258: if ((s == null) || (s.length() == 0)) {
259: return 0;
260: }
261:
262: int len = s.length();
263:
264: for (int i = 0; i < len; i++) {
265: char c = s.charAt(i);
266:
267: if (c == '\\') {
268: if ((i < len - 1) && (s.charAt(i + 1) == 'u')) {
269: b.write(c); // encode the \ as unicode, so 'u' is ignored
270: b.write('u');
271: b.write('0');
272: b.write('0');
273: b.write('5');
274: b.write('c');
275:
276: count += 6;
277: } else {
278: b.write(c);
279:
280: count++;
281: }
282: } else if ((c >= 0x0020) && (c <= 0x007f)) {
283: b.write(c); // this is 99%
284:
285: count++;
286:
287: if (c == '\'' && doubleSingleQuotes) {
288: b.write(c);
289:
290: count++;
291: }
292: } else {
293: b.write('\\');
294: b.write('u');
295: b.write(HEXBYTES[(c >> 12) & 0xf]);
296: b.write(HEXBYTES[(c >> 8) & 0xf]);
297: b.write(HEXBYTES[(c >> 4) & 0xf]);
298: b.write(HEXBYTES[c & 0xf]);
299:
300: count += 6;
301: }
302: }
303:
304: return count;
305: }
306:
307: // fredt@users 20020522 - fix for 557510 - backslash bug
308: // this legacy bug resulted from forward reading the input when a backslash
309: // was present and manifested itself when a backslash was followed
310: // immdediately by a character outside the 0x20-7f range in a database field.
311:
312: /**
313: * Hsqldb specific decoding used only for log files.
314: *
315: * This method converts the 7 bit escaped ASCII strings in a log file
316: * back into Java Unicode strings. See unicodeToAccii() above,
317: *
318: * @param s encoded ASCII string in byte array
319: * @param offset position of first byte
320: * @param length number of bytes to use
321: *
322: * @return Java Unicode string
323: */
324: public static String asciiToUnicode(byte[] s, int offset, int length) {
325:
326: if (length == 0) {
327: return "";
328: }
329:
330: char[] b = new char[length];
331: int j = 0;
332:
333: for (int i = 0; i < length; i++) {
334: byte c = s[offset + i];
335:
336: if (c == '\\' && i < length - 5) {
337: byte c1 = s[offset + i + 1];
338:
339: if (c1 == 'u') {
340: i++;
341:
342: // 4 characters read should always return 0-15
343: int k = HEXINDEX.indexOf(s[offset + (++i)]) << 12;
344:
345: k += HEXINDEX.indexOf(s[offset + (++i)]) << 8;
346: k += HEXINDEX.indexOf(s[offset + (++i)]) << 4;
347: k += HEXINDEX.indexOf(s[offset + (++i)]);
348: b[j++] = (char) k;
349: } else {
350: b[j++] = (char) c;
351: }
352: } else {
353: b[j++] = (char) c;
354: }
355: }
356:
357: return new String(b, 0, j);
358: }
359:
360: public static String asciiToUnicode(String s) {
361:
362: if ((s == null) || (s.indexOf("\\u") == -1)) {
363: return s;
364: }
365:
366: int len = s.length();
367: char[] b = new char[len];
368: int j = 0;
369:
370: for (int i = 0; i < len; i++) {
371: char c = s.charAt(i);
372:
373: if (c == '\\' && i < len - 5) {
374: char c1 = s.charAt(i + 1);
375:
376: if (c1 == 'u') {
377: i++;
378:
379: // 4 characters read should always return 0-15
380: int k = HEXINDEX.indexOf(s.charAt(++i)) << 12;
381:
382: k += HEXINDEX.indexOf(s.charAt(++i)) << 8;
383: k += HEXINDEX.indexOf(s.charAt(++i)) << 4;
384: k += HEXINDEX.indexOf(s.charAt(++i));
385: b[j++] = (char) k;
386: } else {
387: b[j++] = c;
388: }
389: } else {
390: b[j++] = c;
391: }
392: }
393:
394: return new String(b, 0, j);
395: }
396:
397: public static String readUTF(byte[] bytearr, int offset, int length)
398: throws IOException {
399:
400: char[] buf = new char[length];
401:
402: return readUTF(bytearr, offset, length, buf);
403: }
404:
405: public static String readUTF(byte[] bytearr, int offset,
406: int length, char[] buf) throws IOException {
407:
408: int bcount = 0;
409: int c, char2, char3;
410: int count = 0;
411:
412: while (count < length) {
413: c = (int) bytearr[offset + count];
414:
415: if (bcount == buf.length) {
416: buf = (char[]) ArrayUtil.resizeArray(buf, length);
417: }
418:
419: if (c > 0) {
420:
421: /* 0xxxxxxx*/
422: count++;
423:
424: buf[bcount++] = (char) c;
425:
426: continue;
427: }
428:
429: c &= 0xff;
430:
431: switch (c >> 4) {
432:
433: case 12:
434: case 13:
435:
436: /* 110x xxxx 10xx xxxx*/
437: count += 2;
438:
439: if (count > length) {
440: throw new UTFDataFormatException();
441: }
442:
443: char2 = (int) bytearr[offset + count - 1];
444:
445: if ((char2 & 0xC0) != 0x80) {
446: throw new UTFDataFormatException();
447: }
448:
449: buf[bcount++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
450: break;
451:
452: case 14:
453:
454: /* 1110 xxxx 10xx xxxx 10xx xxxx */
455: count += 3;
456:
457: if (count > length) {
458: throw new UTFDataFormatException();
459: }
460:
461: char2 = (int) bytearr[offset + count - 2];
462: char3 = (int) bytearr[offset + count - 1];
463:
464: if (((char2 & 0xC0) != 0x80)
465: || ((char3 & 0xC0) != 0x80)) {
466: throw new UTFDataFormatException();
467: }
468:
469: buf[bcount++] = (char) (((c & 0x0F) << 12)
470: | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
471: break;
472:
473: default:
474:
475: /* 10xx xxxx, 1111 xxxx */
476: throw new UTFDataFormatException();
477: }
478: }
479:
480: // The number of chars produced may be less than length
481: return new String(buf, 0, bcount);
482: }
483:
484: /**
485: * Writes a string to the specified DataOutput using UTF-8 encoding in a
486: * machine-independent manner.
487: * <p>
488: * @param str a string to be written.
489: * @param out destination to write to
490: * @return The number of bytes written out.
491: */
492: public static int writeUTF(String str, HsqlByteArrayOutputStream out) {
493:
494: int strlen = str.length();
495: int c, count = 0;
496:
497: for (int i = 0; i < strlen; i++) {
498: c = str.charAt(i);
499:
500: if (c >= 0x0001 && c <= 0x007F) {
501: out.write(c);
502:
503: count++;
504: } else if (c > 0x07FF) {
505: out.write(0xE0 | ((c >> 12) & 0x0F));
506: out.write(0x80 | ((c >> 6) & 0x3F));
507: out.write(0x80 | ((c >> 0) & 0x3F));
508:
509: count += 3;
510: } else {
511: out.write(0xC0 | ((c >> 6) & 0x1F));
512: out.write(0x80 | ((c >> 0) & 0x3F));
513:
514: count += 2;
515: }
516: }
517:
518: return count;
519: }
520:
521: public static int getUTFSize(String s) {
522:
523: int len = (s == null) ? 0 : s.length();
524: int l = 0;
525:
526: for (int i = 0; i < len; i++) {
527: int c = s.charAt(i);
528:
529: if ((c >= 0x0001) && (c <= 0x007F)) {
530: l++;
531: } else if (c > 0x07FF) {
532: l += 3;
533: } else {
534: l += 2;
535: }
536: }
537:
538: return l;
539: }
540:
541: /**
542: * Using a Reader and a Writer, returns a String from an InputStream.
543: */
544: public static String inputStreamToString(InputStream x, int length)
545: throws IOException {
546:
547: InputStreamReader in = new InputStreamReader(x);
548: StringWriter writer = new StringWriter();
549: int blocksize = 8 * 1024;
550: char[] buffer = new char[blocksize];
551:
552: for (int left = length; left > 0;) {
553: int read = in.read(buffer, 0, left > blocksize ? blocksize
554: : left);
555:
556: if (read == -1) {
557: break;
558: }
559:
560: writer.write(buffer, 0, read);
561:
562: left -= read;
563: }
564:
565: writer.close();
566:
567: return writer.toString();
568: }
569:
570: // fredt@users 20020130 - patch 497872 by Nitin Chauhan - use byte[] of exact size
571:
572: /**
573: * Returns the quoted version of the string using the quotechar argument.
574: * doublequote argument indicates whether each instance of quotechar
575: * inside the string is doubled.<p>
576: *
577: * null string argument returns null. If the caller needs the literal
578: * "NULL" it should created it itself <p>
579: *
580: * The reverse conversion is handled in Tokenizer.java
581: */
582: public static String toQuotedString(String s, char quoteChar,
583: boolean extraQuote) {
584:
585: if (s == null) {
586: return null;
587: }
588:
589: int count = extraQuote ? count(s, quoteChar) : 0;
590: int len = s.length();
591: char[] b = new char[2 + count + len];
592: int i = 0;
593: int j = 0;
594:
595: b[j++] = quoteChar;
596:
597: for (; i < len; i++) {
598: char c = s.charAt(i);
599:
600: b[j++] = c;
601:
602: if (extraQuote && c == quoteChar) {
603: b[j++] = c;
604: }
605: }
606:
607: b[j] = quoteChar;
608:
609: return new String(b);
610: }
611:
612: /**
613: * Counts Character c in String s
614: *
615: * @param String s
616: *
617: * @return int count
618: */
619: static int count(final String s, final char c) {
620:
621: int pos = 0;
622: int count = 0;
623:
624: if (s != null) {
625: while ((pos = s.indexOf(c, pos)) > -1) {
626: count++;
627: pos++;
628: }
629: }
630:
631: return count;
632: }
633: }
|