001: // Copyright 02/17/00 Sun Microsystems, Inc. All Rights Reserved.
002: // "@(#)UnicodeInputStreamReader.java 1.2 00/02/17 Sun Microsystems"
003:
004: package com.sun.portal.util;
005:
006: import java.io.*;
007:
008: /**
009: * The <code>UnicodeInputStreamReader</code> class converts from a byte
010: * stream that contains Java Unicode encoded characters (\\uXXXX) to
011: * Unicode characters. It can be used to read files that have been
012: * produced using the native2ascii tool.
013: *
014: */
015:
016: public class UnicodeInputStreamReader extends FilterReader {
017: /**
018: * Creates a Unicode input stream reader that reads from the given stream.
019: *
020: * @param is the InputStream from which to read
021: */
022: public UnicodeInputStreamReader(InputStream is)
023: throws UnsupportedEncodingException {
024: super (new InputStreamReader(is, "8859_1"));
025: }
026:
027: public boolean markSupported() {
028: return false;
029: }
030:
031: /**
032: * Read one character from the stream. See the other read method for information
033: * on decoding that is performed.
034: *
035: * @returns -1 on end of input, otherwise the character that was read
036: */
037: public int read() throws IOException {
038: char cbuf[] = new char[1];
039: return read(cbuf, 0, 1) == 1 ? cbuf[0] : -1;
040: }
041:
042: /**
043: * Read up to len characters from the stream and put them in cbuf starting
044: * at offset off. As characters are read, the following conversions are performed:
045: *
046: * \\uXXXX is converted to one Unicode character having the value represented by
047: * the four hex digits.
048: * \\ is converted to \
049: * \X any character preceded by \ is converted to that character.
050: *
051: * @param cbuf the array of characters that is filled in
052: * @param off the offset at which to start placing characters
053: * @param len the maximum number of characters to read
054: *
055: * @returns the number of characters read
056: */
057: public int read(char cbuf[], int off, int len) throws IOException {
058: int c;
059: char cc;
060: for (int i = 0; i < len; i++) {
061: c = in.read();
062: if (c == -1) {
063: return (i > 0) ? i : -1;
064: }
065: cc = (char) c;
066: if (cc == '\\') {
067: c = in.read();
068: if (c == -1) {
069: return (i > 0) ? i : -1;
070: }
071: cc = (char) c;
072: if (cc == 'u') {
073: // Read the xxxx
074: int value = 0;
075: for (int j = 0; j < 4; j++) {
076: c = in.read();
077: if (c == -1) {
078: return (i > 0) ? i : -1;
079: }
080: cc = (char) c;
081: switch (cc) {
082: case '0':
083: case '1':
084: case '2':
085: case '3':
086: case '4':
087: case '5':
088: case '6':
089: case '7':
090: case '8':
091: case '9':
092: value = (value << 4) + cc - '0';
093: break;
094: case 'a':
095: case 'b':
096: case 'c':
097: case 'd':
098: case 'e':
099: case 'f':
100: value = (value << 4) + 10 + cc - 'a';
101: break;
102: case 'A':
103: case 'B':
104: case 'C':
105: case 'D':
106: case 'E':
107: case 'F':
108: value = (value << 4) + 10 + cc - 'A';
109: break;
110: default:
111: throw new IllegalArgumentException(
112: "Malformed \\uxxxx encoding.");
113: }
114: }
115: cbuf[off + i] = (char) value;
116: } else {
117: cbuf[off + i] = cc;
118: }
119: } else {
120: cbuf[off + i] = cc;
121: }
122: }
123: return len;
124: }
125: }
|