001: /*
002: * $Header$
003: * $Revision: 4672 $
004: * $Date: 2006-09-27 00:03:16 +0000 (Wed, 27 Sep 2006) $
005: *
006: * ====================================================================
007: *
008: * Copyright 1999-2004 The Apache Software Foundation
009: *
010: * Licensed under the Apache License, Version 2.0 (the "License");
011: * you may not use this file except in compliance with the License.
012: * You may obtain a copy of the License at
013: *
014: * http://www.apache.org/licenses/LICENSE-2.0
015: *
016: * Unless required by applicable law or agreed to in writing, software
017: * distributed under the License is distributed on an "AS IS" BASIS,
018: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019: * See the License for the specific language governing permissions and
020: * limitations under the License.
021: * ====================================================================
022: *
023: * This software consists of voluntary contributions made by many
024: * individuals on behalf of the Apache Software Foundation. For more
025: * information on the Apache Software Foundation, please see
026: * <http://www.apache.org/>.
027: *
028: */
029:
030: package org.apache.commons.httpclient;
031:
032: import java.io.IOException;
033: import java.io.InputStream;
034: import java.io.ByteArrayOutputStream;
035: import java.util.ArrayList;
036:
037: import org.apache.commons.httpclient.util.EncodingUtil;
038: import org.apache.commons.logging.Log;
039: import org.apache.commons.logging.LogFactory;
040:
041: /**
042: * A utility class for parsing http header values according to
043: * RFC-2616 Section 4 and 19.3.
044: *
045: * @author Michael Becke
046: * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
047: *
048: * @since 2.0beta1
049: */
050: @SuppressWarnings("unchecked")
051: public class HttpParser {
052:
053: /** Log object for this class. */
054: private static final Log LOG = LogFactory.getLog(HttpParser.class);
055:
056: /**
057: * Constructor for HttpParser.
058: */
059: private HttpParser() {
060: }
061:
062: /**
063: * Return byte array from an (unchunked) input stream.
064: * Stop reading when <tt>"\n"</tt> terminator encountered
065: * If the stream ends before the line terminator is found,
066: * the last part of the string will still be returned.
067: * If no input data available, <code>null</code> is returned.
068: *
069: * @param inputStream the stream to read from
070: *
071: * @throws IOException if an I/O problem occurs
072: * @return a byte array from the stream
073: */
074: public static byte[] readRawLine(InputStream inputStream)
075: throws IOException {
076: LOG.trace("enter HttpParser.readRawLine()");
077:
078: ByteArrayOutputStream buf = new ByteArrayOutputStream();
079: int ch;
080: while ((ch = inputStream.read()) >= 0) {
081: buf.write(ch);
082: if (ch == '\n') { // be tolerant (RFC-2616 Section 19.3)
083: break;
084: }
085: }
086: if (buf.size() == 0) {
087: return null;
088: }
089: return buf.toByteArray();
090: }
091:
092: /**
093: * Read up to <tt>"\n"</tt> from an (unchunked) input stream.
094: * If the stream ends before the line terminator is found,
095: * the last part of the string will still be returned.
096: * If no input data available, <code>null</code> is returned.
097: *
098: * @param inputStream the stream to read from
099: * @param charset charset of HTTP protocol elements
100: *
101: * @throws IOException if an I/O problem occurs
102: * @return a line from the stream
103: *
104: * @since 3.0
105: */
106: public static String readLine(InputStream inputStream,
107: String charset) throws IOException {
108: LOG.trace("enter HttpParser.readLine(InputStream, String)");
109: byte[] rawdata = readRawLine(inputStream);
110: if (rawdata == null) {
111: return null;
112: }
113: // strip CR and LF from the end
114: int len = rawdata.length;
115: int offset = 0;
116: if (len > 0) {
117: if (rawdata[len - 1] == '\n') {
118: offset++;
119: if (len > 1) {
120: if (rawdata[len - 2] == '\r') {
121: offset++;
122: }
123: }
124: }
125: }
126: return EncodingUtil
127: .getString(rawdata, 0, len - offset, charset);
128: }
129:
130: /**
131: * Read up to <tt>"\n"</tt> from an (unchunked) input stream.
132: * If the stream ends before the line terminator is found,
133: * the last part of the string will still be returned.
134: * If no input data available, <code>null</code> is returned
135: *
136: * @param inputStream the stream to read from
137: *
138: * @throws IOException if an I/O problem occurs
139: * @return a line from the stream
140: *
141: * @deprecated use #readLine(InputStream, String)
142: */
143:
144: public static String readLine(InputStream inputStream)
145: throws IOException {
146: LOG.trace("enter HttpParser.readLine(InputStream)");
147: return readLine(inputStream, "US-ASCII");
148: }
149:
150: /**
151: * Parses headers from the given stream. Headers with the same name are not
152: * combined.
153: *
154: * @param is the stream to read headers from
155: * @param charset the charset to use for reading the data
156: *
157: * @return an array of headers in the order in which they were parsed
158: *
159: * @throws IOException if an IO error occurs while reading from the stream
160: * @throws HttpException if there is an error parsing a header value
161: *
162: * @since 3.0
163: */
164: public static Header[] parseHeaders(InputStream is, String charset)
165: throws IOException, HttpException {
166: LOG
167: .trace("enter HeaderParser.parseHeaders(InputStream, String)");
168:
169: ArrayList headers = new ArrayList();
170: String name = null;
171: StringBuffer value = null;
172: for (;;) {
173: String line = HttpParser.readLine(is, charset);
174: if ((line == null) || (line.trim().length() < 1)) {
175: break;
176: }
177:
178: // Parse the header name and value
179: // Check for folded headers first
180: // Detect LWS-char see HTTP/1.0 or HTTP/1.1 Section 2.2
181: // discussion on folded headers
182: if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) {
183: // we have continuation folded header
184: // so append value
185: if (value != null) {
186: value.append(' ');
187: value.append(line.trim());
188: }
189: } else {
190: // make sure we save the previous name,value pair if present
191: if (name != null) {
192: headers.add(new Header(name, value.toString()));
193: }
194:
195: // Otherwise we should have normal HTTP header line
196: // Parse the header name and value
197: int colon = line.indexOf(":");
198:
199: // START HERITRIX Change
200: // Don't throw an exception if can't parse. We want to keep
201: // going even though header is bad. Rather, create
202: // pseudo-header.
203: if (colon < 0) {
204: // throw new ProtocolException("Unable to parse header: " +
205: // line);
206: name = "HttpClient-Bad-Header-Line-Failed-Parse";
207: value = new StringBuffer(line);
208:
209: } else {
210: name = line.substring(0, colon).trim();
211: value = new StringBuffer(line.substring(colon + 1)
212: .trim());
213: }
214: // END HERITRIX change.
215:
216: }
217:
218: }
219:
220: // make sure we save the last name,value pair if present
221: if (name != null) {
222: headers.add(new Header(name, value.toString()));
223: }
224:
225: return (Header[]) headers.toArray(new Header[headers.size()]);
226: }
227:
228: /**
229: * Parses headers from the given stream. Headers with the same name are not
230: * combined.
231: *
232: * @param is the stream to read headers from
233: *
234: * @return an array of headers in the order in which they were parsed
235: *
236: * @throws IOException if an IO error occurs while reading from the stream
237: * @throws HttpException if there is an error parsing a header value
238: *
239: * @deprecated use #parseHeaders(InputStream, String)
240: */
241: public static Header[] parseHeaders(InputStream is)
242: throws IOException, HttpException {
243: LOG
244: .trace("enter HeaderParser.parseHeaders(InputStream, String)");
245: return parseHeaders(is, "US-ASCII");
246: }
247: }
|