001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit;
039:
040: import java.io.ByteArrayInputStream;
041: import java.io.IOException;
042: import java.io.InputStream;
043: import java.io.Serializable;
044: import java.io.UnsupportedEncodingException;
045: import java.net.URL;
046: import java.util.Iterator;
047: import java.util.List;
048:
049: import org.apache.commons.httpclient.NameValuePair;
050: import org.apache.commons.lang.ArrayUtils;
051: import org.apache.commons.lang.StringUtils;
052: import org.apache.commons.logging.Log;
053: import org.apache.commons.logging.LogFactory;
054:
055: /**
056: * Simple base class for {@link WebResponse}.
057: *
058: * @version $Revision: 2132 $
059: * @author Brad Clarke
060: * @author Ahmed Ashour
061: */
062: public class WebResponseImpl implements WebResponse, Serializable {
063:
064: private static final long serialVersionUID = 2842434739251092348L;
065:
066: private final transient Log log_ = LogFactory
067: .getLog(WebResponseImpl.class);
068: private URL url_;
069: private SubmitMethod requestMethod_;
070: private long loadTime_;
071: private WebResponseData responseData_;
072: private String charset_;
073:
074: /**
075: * Construct with all data
076: *
077: * @param responseData Data that was send back
078: * @param url Where this response came from
079: * @param requestMethod The method used to get this response
080: * @param loadTime How long the response took to be sent
081: */
082: public WebResponseImpl(final WebResponseData responseData,
083: final URL url, final SubmitMethod requestMethod,
084: final long loadTime) {
085: this (responseData, TextUtil.DEFAULT_CHARSET, url,
086: requestMethod, loadTime);
087: }
088:
089: /**
090: * Construct with all data
091: *
092: * @param responseData Data that was send back
093: * @param charset Charset used if not returned in the response.
094: * @param url Where this response came from
095: * @param requestMethod The method used to get this response
096: * @param loadTime How long the response took to be sent
097: */
098: public WebResponseImpl(final WebResponseData responseData,
099: final String charset, final URL url,
100: final SubmitMethod requestMethod, final long loadTime) {
101: responseData_ = responseData;
102: charset_ = charset;
103: url_ = url;
104: requestMethod_ = requestMethod;
105: loadTime_ = loadTime;
106: }
107:
108: /**
109: * {@inheritDoc}
110: */
111: public int getStatusCode() {
112: return responseData_.getStatusCode();
113: }
114:
115: /**
116: * {@inheritDoc}
117: */
118: public String getStatusMessage() {
119: return responseData_.getStatusMessage();
120: }
121:
122: /**
123: * {@inheritDoc}
124: */
125: public String getContentType() {
126: final String contentTypeHeader = getResponseHeaderValue("content-type");
127: if (contentTypeHeader == null) {
128: // Not technically legal but some servers don't return a content-type
129: return "";
130: }
131: final int index = contentTypeHeader.indexOf(';');
132: if (index == -1) {
133: return contentTypeHeader;
134: }
135: return contentTypeHeader.substring(0, index);
136: }
137:
138: /**
139: * {@inheritDoc}
140: */
141: public String getContentAsString() {
142: try {
143: final byte[] body = responseData_.getBody();
144: if (body != null) {
145: return new String(body, getContentCharSet());
146: } else {
147: return null;
148: }
149: } catch (final UnsupportedEncodingException e) {
150: return null;
151: }
152: }
153:
154: /**
155: * {@inheritDoc}
156: */
157: public InputStream getContentAsStream() throws IOException {
158: final byte[] body = responseData_.getBody();
159: if (body != null) {
160: return new ByteArrayInputStream(body);
161: } else {
162: return null;
163: }
164: }
165:
166: /**
167: * {@inheritDoc}
168: */
169: public URL getUrl() {
170: return url_;
171: }
172:
173: /**
174: * {@inheritDoc}
175: */
176: public SubmitMethod getRequestMethod() {
177: return requestMethod_;
178: }
179:
180: /**
181: * {@inheritDoc}
182: */
183: public List getResponseHeaders() {
184: return responseData_.getResponseHeaders();
185: }
186:
187: /**
188: * {@inheritDoc}
189: */
190: public String getResponseHeaderValue(final String headerName) {
191: final Iterator iterator = responseData_.getResponseHeaders()
192: .iterator();
193: while (iterator.hasNext()) {
194: final NameValuePair pair = (NameValuePair) iterator.next();
195: if (pair.getName().equalsIgnoreCase(headerName)) {
196: return pair.getValue();
197: }
198: }
199: return null;
200: }
201:
202: /**
203: * {@inheritDoc}
204: */
205: public long getLoadTimeInMilliSeconds() {
206: return loadTime_;
207: }
208:
209: /**
210: * {@inheritDoc}
211: * If no charset is specified in headers, then try to guess it from the content.
212: * Currently only UTF-8 with BOM marker is detected this way.
213: * @see <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Wikipedia - Byte Order Mark</a>
214: * @return the charset, {@link TextUtil#DEFAULT_CHARSET} if it can't be determined
215: */
216: public String getContentCharSet() {
217: final String contentTypeHeader = getResponseHeaderValue("content-type");
218: String charset = StringUtils.substringAfter(contentTypeHeader,
219: "charset=");
220: if (StringUtils.isEmpty(charset)) {
221: log_
222: .debug("No charset specified in header, trying to guess it from content");
223: final byte[] body = responseData_.getBody();
224: final byte[] markerUTF8 = { (byte) 0xef, (byte) 0xbb,
225: (byte) 0xbf };
226: final byte[] markerUTF16BE = { (byte) 0xfe, (byte) 0xff };
227: final byte[] markerUTF16LE = { (byte) 0xff, (byte) 0xfe };
228: if (body != null
229: && ArrayUtils.isEquals(markerUTF8, ArrayUtils
230: .subarray(body, 0, 3))) {
231: log_.debug("UTF-8 marker found");
232: charset = "UTF-8";
233: } else if (body != null
234: && ArrayUtils.isEquals(markerUTF16BE, ArrayUtils
235: .subarray(body, 0, 2))) {
236: log_.debug("UTF-16BE marker found");
237: charset = "UTF-16BE";
238: } else if (body != null
239: && ArrayUtils.isEquals(markerUTF16LE, ArrayUtils
240: .subarray(body, 0, 2))) {
241: log_.debug("UTF-16LE marker found");
242: charset = "UTF-16LE";
243: } else {
244: log_.debug("No charset guessed, using " + charset_);
245: charset = charset_;
246: }
247: } else if (charset.charAt(0) == '"'
248: && charset.charAt(charset.length() - 1) == '"'
249: || charset.charAt(0) == '\''
250: && charset.charAt(charset.length() - 1) == '\'') {
251: charset = charset.substring(1, charset.length() - 1);
252: }
253: return charset;
254: }
255:
256: /**
257: * {@inheritDoc}
258: */
259: public byte[] getResponseBody() {
260: return responseData_.getBody();
261: }
262:
263: }
|