001: /*
002: * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $
003: * $Revision: 480424 $
004: * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $
005: *
006: * ====================================================================
007: *
008: * Licensed to the Apache Software Foundation (ASF) under one or more
009: * contributor license agreements. See the NOTICE file distributed with
010: * this work for additional information regarding copyright ownership.
011: * The ASF licenses this file to You under the Apache License, Version 2.0
012: * (the "License"); you may not use this file except in compliance with
013: * the License. You may obtain a copy of the License at
014: *
015: * http://www.apache.org/licenses/LICENSE-2.0
016: *
017: * Unless required by applicable law or agreed to in writing, software
018: * distributed under the License is distributed on an "AS IS" BASIS,
019: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
020: * See the License for the specific language governing permissions and
021: * limitations under the License.
022: * ====================================================================
023: *
024: * This software consists of voluntary contributions made by many
025: * individuals on behalf of the Apache Software Foundation. For more
026: * information on the Apache Software Foundation, please see
027: * <http://www.apache.org/>.
028: *
029: */
030:
031: package org.apache.commons.httpclient;
032:
033: import java.io.ByteArrayOutputStream;
034: import java.io.IOException;
035: import java.io.InputStream;
036:
037: import org.apache.commons.httpclient.util.EncodingUtil;
038: import org.apache.commons.httpclient.util.ExceptionUtil;
039: import org.apache.commons.logging.Log;
040: import org.apache.commons.logging.LogFactory;
041:
042: /**
043: * <p>Transparently coalesces chunks of a HTTP stream that uses
044: * Transfer-Encoding chunked.</p>
045: *
046: * <p>Note that this class NEVER closes the underlying stream, even when close
047: * gets called. Instead, it will read until the "end" of its chunking on close,
048: * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
049: * not requiring the client to remember to read the entire contents of the
050: * response.</p>
051: *
052: * @author Ortwin Glueck
053: * @author Sean C. Sullivan
054: * @author Martin Elwin
055: * @author Eric Johnson
056: * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
057: * @author Michael Becke
058: * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
059: *
060: * @since 2.0
061: *
062: */
063: public class ChunkedInputStream extends InputStream {
064: /** The inputstream that we're wrapping */
065: private InputStream in;
066:
067: /** The chunk size */
068: private int chunkSize;
069:
070: /** The current position within the current chunk */
071: private int pos;
072:
073: /** True if we'are at the beginning of stream */
074: private boolean bof = true;
075:
076: /** True if we've reached the end of stream */
077: private boolean eof = false;
078:
079: /** True if this stream is closed */
080: private boolean closed = false;
081:
082: /** The method that this stream came from */
083: private HttpMethod method = null;
084:
085: /** Log object for this class. */
086: private static final Log LOG = LogFactory
087: .getLog(ChunkedInputStream.class);
088:
089: /**
090: * ChunkedInputStream constructor that associates the chunked input stream with a
091: * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod
092: * HTTP method} the chunked input stream originates from. If chunked input stream
093: * contains any footers (trailing headers), they will be added to the associated
094: * {@link HttpMethod HTTP method}.
095: *
096: * @param in the raw input stream
097: * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.
098: *
099: * @throws IOException If an IO error occurs
100: */
101: public ChunkedInputStream(final InputStream in,
102: final HttpMethod method) throws IOException {
103:
104: if (in == null) {
105: throw new IllegalArgumentException(
106: "InputStream parameter may not be null");
107: }
108: this .in = in;
109: this .method = method;
110: this .pos = 0;
111: }
112:
113: /**
114: * ChunkedInputStream constructor
115: *
116: * @param in the raw input stream
117: *
118: * @throws IOException If an IO error occurs
119: */
120: public ChunkedInputStream(final InputStream in) throws IOException {
121: this (in, null);
122: }
123:
124: /**
125: * <p> Returns all the data in a chunked stream in coalesced form. A chunk
126: * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
127: * is detected.</p>
128: *
129: * <p> Trailer headers are read automcatically at the end of the stream and
130: * can be obtained with the getResponseFooters() method.</p>
131: *
132: * @return -1 of the end of the stream has been reached or the next data
133: * byte
134: * @throws IOException If an IO problem occurs
135: *
136: * @see HttpMethod#getResponseFooters()
137: */
138: public int read() throws IOException {
139:
140: if (closed) {
141: throw new IOException("Attempted read from closed stream.");
142: }
143: if (eof) {
144: return -1;
145: }
146: if (pos >= chunkSize) {
147: nextChunk();
148: if (eof) {
149: return -1;
150: }
151: }
152: pos++;
153: return in.read();
154: }
155:
156: /**
157: * Read some bytes from the stream.
158: * @param b The byte array that will hold the contents from the stream.
159: * @param off The offset into the byte array at which bytes will start to be
160: * placed.
161: * @param len the maximum number of bytes that can be returned.
162: * @return The number of bytes returned or -1 if the end of stream has been
163: * reached.
164: * @see java.io.InputStream#read(byte[], int, int)
165: * @throws IOException if an IO problem occurs.
166: */
167: public int read(byte[] b, int off, int len) throws IOException {
168:
169: if (closed) {
170: throw new IOException("Attempted read from closed stream.");
171: }
172:
173: if (eof) {
174: return -1;
175: }
176: if (pos >= chunkSize) {
177: nextChunk();
178: if (eof) {
179: return -1;
180: }
181: }
182: len = Math.min(len, chunkSize - pos);
183: int count = in.read(b, off, len);
184: pos += count;
185: return count;
186: }
187:
188: /**
189: * Read some bytes from the stream.
190: * @param b The byte array that will hold the contents from the stream.
191: * @return The number of bytes returned or -1 if the end of stream has been
192: * reached.
193: * @see java.io.InputStream#read(byte[])
194: * @throws IOException if an IO problem occurs.
195: */
196: public int read(byte[] b) throws IOException {
197: return read(b, 0, b.length);
198: }
199:
200: /**
201: * Read the CRLF terminator.
202: * @throws IOException If an IO error occurs.
203: */
204: private void readCRLF() throws IOException {
205: int cr = in.read();
206: int lf = in.read();
207: if ((cr != '\r') || (lf != '\n')) {
208: throw new IOException("CRLF expected at end of chunk: "
209: + cr + "/" + lf);
210: }
211: }
212:
213: /**
214: * Read the next chunk.
215: * @throws IOException If an IO error occurs.
216: */
217: private void nextChunk() throws IOException {
218: if (!bof) {
219: readCRLF();
220: }
221: chunkSize = getChunkSizeFromInputStream(in);
222: bof = false;
223: pos = 0;
224: if (chunkSize == 0) {
225: eof = true;
226: parseTrailerHeaders();
227: }
228: }
229:
230: /**
231: * Expects the stream to start with a chunksize in hex with optional
232: * comments after a semicolon. The line must end with a CRLF: "a3; some
233: * comment\r\n" Positions the stream at the start of the next line.
234: *
235: * @param in The new input stream.
236: * @param required <tt>true<tt/> if a valid chunk must be present,
237: * <tt>false<tt/> otherwise.
238: *
239: * @return the chunk size as integer
240: *
241: * @throws IOException when the chunk size could not be parsed
242: */
243: private static int getChunkSizeFromInputStream(final InputStream in)
244: throws IOException {
245:
246: ByteArrayOutputStream baos = new ByteArrayOutputStream();
247: // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
248: int state = 0;
249: while (state != -1) {
250: int b = in.read();
251: if (b == -1) {
252: throw new IOException(
253: "chunked stream ended unexpectedly");
254: }
255: switch (state) {
256: case 0:
257: switch (b) {
258: case '\r':
259: state = 1;
260: break;
261: case '\"':
262: state = 2;
263: /* fall through */
264: default:
265: baos.write(b);
266: }
267: break;
268:
269: case 1:
270: if (b == '\n') {
271: state = -1;
272: } else {
273: // this was not CRLF
274: throw new IOException(
275: "Protocol violation: Unexpected"
276: + " single newline character in chunk size");
277: }
278: break;
279:
280: case 2:
281: switch (b) {
282: case '\\':
283: b = in.read();
284: baos.write(b);
285: break;
286: case '\"':
287: state = 0;
288: /* fall through */
289: default:
290: baos.write(b);
291: }
292: break;
293: default:
294: throw new RuntimeException("assertion failed");
295: }
296: }
297:
298: //parse data
299: String dataString = EncodingUtil.getAsciiString(baos
300: .toByteArray());
301: int separator = dataString.indexOf(';');
302: dataString = (separator > 0) ? dataString.substring(0,
303: separator).trim() : dataString.trim();
304:
305: int result;
306: try {
307: result = Integer.parseInt(dataString.trim(), 16);
308: } catch (NumberFormatException e) {
309: throw new IOException("Bad chunk size: " + dataString);
310: }
311: return result;
312: }
313:
314: /**
315: * Reads and stores the Trailer headers.
316: * @throws IOException If an IO problem occurs
317: */
318: private void parseTrailerHeaders() throws IOException {
319: Header[] footers = null;
320: try {
321: String charset = "US-ASCII";
322: if (this .method != null) {
323: charset = this .method.getParams()
324: .getHttpElementCharset();
325: }
326: footers = HttpParser.parseHeaders(in, charset);
327: } catch (HttpException e) {
328: LOG.error("Error parsing trailer headers", e);
329: IOException ioe = new IOException(e.getMessage());
330: ExceptionUtil.initCause(ioe, e);
331: throw ioe;
332: }
333: if (this .method != null) {
334: for (int i = 0; i < footers.length; i++) {
335: this .method.addResponseFooter(footers[i]);
336: }
337: }
338: }
339:
340: /**
341: * Upon close, this reads the remainder of the chunked message,
342: * leaving the underlying socket at a position to start reading the
343: * next response without scanning.
344: * @throws IOException If an IO problem occurs.
345: */
346: public void close() throws IOException {
347: if (!closed) {
348: try {
349: if (!eof) {
350: exhaustInputStream(this );
351: }
352: } finally {
353: eof = true;
354: closed = true;
355: }
356: }
357: }
358:
359: /**
360: * Exhaust an input stream, reading until EOF has been encountered.
361: *
362: * <p>Note that this function is intended as a non-public utility.
363: * This is a little weird, but it seemed silly to make a utility
364: * class for this one function, so instead it is just static and
365: * shared that way.</p>
366: *
367: * @param inStream The {@link InputStream} to exhaust.
368: * @throws IOException If an IO problem occurs
369: */
370: static void exhaustInputStream(InputStream inStream)
371: throws IOException {
372: // read and discard the remainder of the message
373: byte buffer[] = new byte[1024];
374: while (inStream.read(buffer) >= 0) {
375: ;
376: }
377: }
378: }
|