001: /*
002: * Copyright 1999,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.jasper.xmlparser;
018:
019: import java.io.InputStream;
020: import java.io.IOException;
021: import java.io.Reader;
022:
023: /**
024: * Reader for UCS-2 and UCS-4 encodings.
025: * (i.e., encodings from ISO-10646-UCS-(2|4)).
026: *
027: * @author Neil Graham, IBM
028: *
029: * @version $Id: UCSReader.java,v 1.2 2004/03/17 19:23:05 luehe Exp $
030: */
031: public class UCSReader extends Reader {
032:
033: //
034: // Constants
035: //
036:
037: /** Default byte buffer size (8192, larger than that of ASCIIReader
038: * since it's reasonable to surmise that the average UCS-4-encoded
039: * file should be 4 times as large as the average ASCII-encoded file).
040: */
041: public static final int DEFAULT_BUFFER_SIZE = 8192;
042:
043: public static final short UCS2LE = 1;
044: public static final short UCS2BE = 2;
045: public static final short UCS4LE = 4;
046: public static final short UCS4BE = 8;
047:
048: //
049: // Data
050: //
051:
052: /** Input stream. */
053: protected InputStream fInputStream;
054:
055: /** Byte buffer. */
056: protected byte[] fBuffer;
057:
058: // what kind of data we're dealing with
059: protected short fEncoding;
060:
061: //
062: // Constructors
063: //
064:
065: /**
066: * Constructs an ASCII reader from the specified input stream
067: * using the default buffer size. The Endian-ness and whether this is
068: * UCS-2 or UCS-4 needs also to be known in advance.
069: *
070: * @param inputStream The input stream.
071: * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
072: */
073: public UCSReader(InputStream inputStream, short encoding) {
074: this (inputStream, DEFAULT_BUFFER_SIZE, encoding);
075: } // <init>(InputStream, short)
076:
077: /**
078: * Constructs an ASCII reader from the specified input stream
079: * and buffer size. The Endian-ness and whether this is
080: * UCS-2 or UCS-4 needs also to be known in advance.
081: *
082: * @param inputStream The input stream.
083: * @param size The initial buffer size.
084: * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE.
085: */
086: public UCSReader(InputStream inputStream, int size, short encoding) {
087: fInputStream = inputStream;
088: fBuffer = new byte[size];
089: fEncoding = encoding;
090: } // <init>(InputStream,int,short)
091:
092: //
093: // Reader methods
094: //
095:
096: /**
097: * Read a single character. This method will block until a character is
098: * available, an I/O error occurs, or the end of the stream is reached.
099: *
100: * <p> Subclasses that intend to support efficient single-character input
101: * should override this method.
102: *
103: * @return The character read, as an integer in the range 0 to 127
104: * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has
105: * been reached
106: *
107: * @exception IOException If an I/O error occurs
108: */
109: public int read() throws IOException {
110: int b0 = fInputStream.read() & 0xff;
111: if (b0 == 0xff)
112: return -1;
113: int b1 = fInputStream.read() & 0xff;
114: if (b1 == 0xff)
115: return -1;
116: if (fEncoding >= 4) {
117: int b2 = fInputStream.read() & 0xff;
118: if (b2 == 0xff)
119: return -1;
120: int b3 = fInputStream.read() & 0xff;
121: if (b3 == 0xff)
122: return -1;
123: System.err.println("b0 is " + (b0 & 0xff) + " b1 "
124: + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 "
125: + (b3 & 0xff));
126: if (fEncoding == UCS4BE)
127: return (b0 << 24) + (b1 << 16) + (b2 << 8) + b3;
128: else
129: return (b3 << 24) + (b2 << 16) + (b1 << 8) + b0;
130: } else { // UCS-2
131: if (fEncoding == UCS2BE)
132: return (b0 << 8) + b1;
133: else
134: return (b1 << 8) + b0;
135: }
136: } // read():int
137:
138: /**
139: * Read characters into a portion of an array. This method will block
140: * until some input is available, an I/O error occurs, or the end of the
141: * stream is reached.
142: *
143: * @param ch Destination buffer
144: * @param offset Offset at which to start storing characters
145: * @param length Maximum number of characters to read
146: *
147: * @return The number of characters read, or -1 if the end of the
148: * stream has been reached
149: *
150: * @exception IOException If an I/O error occurs
151: */
152: public int read(char ch[], int offset, int length)
153: throws IOException {
154: int byteLength = length << ((fEncoding >= 4) ? 2 : 1);
155: if (byteLength > fBuffer.length) {
156: byteLength = fBuffer.length;
157: }
158: int count = fInputStream.read(fBuffer, 0, byteLength);
159: if (count == -1)
160: return -1;
161: // try and make count be a multiple of the number of bytes we're looking for
162: if (fEncoding >= 4) { // BigEndian
163: // this looks ugly, but it avoids an if at any rate...
164: int numToRead = (4 - (count & 3) & 3);
165: for (int i = 0; i < numToRead; i++) {
166: int charRead = fInputStream.read();
167: if (charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
168: for (int j = i; j < numToRead; j++)
169: fBuffer[count + j] = 0;
170: break;
171: } else {
172: fBuffer[count + i] = (byte) charRead;
173: }
174: }
175: count += numToRead;
176: } else {
177: int numToRead = count & 1;
178: if (numToRead != 0) {
179: count++;
180: int charRead = fInputStream.read();
181: if (charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls.
182: fBuffer[count] = 0;
183: } else {
184: fBuffer[count] = (byte) charRead;
185: }
186: }
187: }
188:
189: // now count is a multiple of the right number of bytes
190: int numChars = count >> ((fEncoding >= 4) ? 2 : 1);
191: int curPos = 0;
192: for (int i = 0; i < numChars; i++) {
193: int b0 = fBuffer[curPos++] & 0xff;
194: int b1 = fBuffer[curPos++] & 0xff;
195: if (fEncoding >= 4) {
196: int b2 = fBuffer[curPos++] & 0xff;
197: int b3 = fBuffer[curPos++] & 0xff;
198: if (fEncoding == UCS4BE)
199: ch[offset + i] = (char) ((b0 << 24) + (b1 << 16)
200: + (b2 << 8) + b3);
201: else
202: ch[offset + i] = (char) ((b3 << 24) + (b2 << 16)
203: + (b1 << 8) + b0);
204: } else { // UCS-2
205: if (fEncoding == UCS2BE)
206: ch[offset + i] = (char) ((b0 << 8) + b1);
207: else
208: ch[offset + i] = (char) ((b1 << 8) + b0);
209: }
210: }
211: return numChars;
212: } // read(char[],int,int)
213:
214: /**
215: * Skip characters. This method will block until some characters are
216: * available, an I/O error occurs, or the end of the stream is reached.
217: *
218: * @param n The number of characters to skip
219: *
220: * @return The number of characters actually skipped
221: *
222: * @exception IOException If an I/O error occurs
223: */
224: public long skip(long n) throws IOException {
225: // charWidth will represent the number of bits to move
226: // n leftward to get num of bytes to skip, and then move the result rightward
227: // to get num of chars effectively skipped.
228: // The trick with &'ing, as with elsewhere in this dcode, is
229: // intended to avoid an expensive use of / that might not be optimized
230: // away.
231: int charWidth = (fEncoding >= 4) ? 2 : 1;
232: long bytesSkipped = fInputStream.skip(n << charWidth);
233: if ((bytesSkipped & (charWidth | 1)) == 0)
234: return bytesSkipped >> charWidth;
235: return (bytesSkipped >> charWidth) + 1;
236: } // skip(long):long
237:
238: /**
239: * Tell whether this stream is ready to be read.
240: *
241: * @return True if the next read() is guaranteed not to block for input,
242: * false otherwise. Note that returning false does not guarantee that the
243: * next read will block.
244: *
245: * @exception IOException If an I/O error occurs
246: */
247: public boolean ready() throws IOException {
248: return false;
249: } // ready()
250:
251: /**
252: * Tell whether this stream supports the mark() operation.
253: */
254: public boolean markSupported() {
255: return fInputStream.markSupported();
256: } // markSupported()
257:
258: /**
259: * Mark the present position in the stream. Subsequent calls to reset()
260: * will attempt to reposition the stream to this point. Not all
261: * character-input streams support the mark() operation.
262: *
263: * @param readAheadLimit Limit on the number of characters that may be
264: * read while still preserving the mark. After
265: * reading this many characters, attempting to
266: * reset the stream may fail.
267: *
268: * @exception IOException If the stream does not support mark(),
269: * or if some other I/O error occurs
270: */
271: public void mark(int readAheadLimit) throws IOException {
272: fInputStream.mark(readAheadLimit);
273: } // mark(int)
274:
275: /**
276: * Reset the stream. If the stream has been marked, then attempt to
277: * reposition it at the mark. If the stream has not been marked, then
278: * attempt to reset it in some way appropriate to the particular stream,
279: * for example by repositioning it to its starting point. Not all
280: * character-input streams support the reset() operation, and some support
281: * reset() without supporting mark().
282: *
283: * @exception IOException If the stream has not been marked,
284: * or if the mark has been invalidated,
285: * or if the stream does not support reset(),
286: * or if some other I/O error occurs
287: */
288: public void reset() throws IOException {
289: fInputStream.reset();
290: } // reset()
291:
292: /**
293: * Close the stream. Once a stream has been closed, further read(),
294: * ready(), mark(), or reset() invocations will throw an IOException.
295: * Closing a previously-closed stream, however, has no effect.
296: *
297: * @exception IOException If an I/O error occurs
298: */
299: public void close() throws IOException {
300: fInputStream.close();
301: } // close()
302:
303: } // class UCSReader
|