0001: /*
0002: * HttpRequest.java
0003: *
0004: * Brazil project web application Framework,
0005: * export version: 1.1
0006: * Copyright (c) 1999-2000 Sun Microsystems, Inc.
0007: *
0008: * Sun Public License Notice
0009: *
0010: * The contents of this file are subject to the Sun Public License Version
0011: * 1.0 (the "License"). You may not use this file except in compliance with
0012: * the License. A copy of the License is included as the file "license.terms",
0013: * and also available at http://www.sun.com/
0014: *
0015: * The Original Code is from:
0016: * Brazil project web application Framework release 1.1.
0017: * The Initial Developer of the Original Code is: cstevens.
0018: * Portions created by cstevens are Copyright (C) Sun Microsystems, Inc.
0019: * All Rights Reserved.
0020: *
0021: * Contributor(s): cstevens, suhler.
0022: *
0023: * Version: 1.12
0024: * Created by cstevens on 99/09/15
0025: * Last modified by cstevens on 00/07/11 11:23:47
0026: */
0027:
0028: package sunlabs.brazil.util.http;
0029:
0030: import sunlabs.brazil.util.SocketFactory;
0031:
0032: import java.io.ByteArrayOutputStream;
0033: import java.io.EOFException;
0034: import java.io.IOException;
0035: import java.io.InputStream;
0036: import java.io.OutputStream;
0037: import java.io.PrintStream;
0038: import java.io.PushbackInputStream;
0039: import java.net.MalformedURLException;
0040: import java.net.URL;
0041: import java.net.UnknownHostException;
0042: import java.util.Vector;
0043:
0044: /**
0045: * Sends an HTTP request to some target host and gets the answer back.
0046: * Similar to the <code>URLConnection</code> class.
0047: * <p>
0048: * Caches connections to hosts, and reuses them if possible. Talks
0049: * HTTP/1.1 to the hosts, in order to keep alive connections as much
0050: * as possible.
0051: * <p>
0052: * The sequence of events for using an <code>HttpRequest</code> is similar
0053: * to how <code>URLConnection</code> is used:
0054: * <ol>
0055: * <li> A new <code>HttpRequest</code> object is constructed.
0056: * <li> The setup parameters are modified:
0057: * <ul>
0058: * <li> {@link #setMethod setMethod}
0059: * <li> {@link #setRequestHeader setRequestHeader}
0060: * <li> {@link #getOutputStream getOutputStream}
0061: * </ul>
0062: * <li> The host (or proxy) is contacted and the HTTP request is issued:
0063: * <ul>
0064: * <li> {@link #connect connect}
0065: * <li> {@link #getInputStream getInputStream}
0066: * </ul>
0067: * <li> The response headers and body are examined:
0068: * <ul>
0069: * <li> {@link #getResponseCode getResponseCode}
0070: * <li> {@link #getResponseHeader getResponseHeader}
0071: * <li> {@link #getContentLength getContentLength}
0072: * </ul>
0073: * <li> The connection is closed:
0074: * <ul>
0075: * <li> {@link #close close}
0076: * </ul>
0077: * </ol>
0078: * <p>
0079: * In the common case, all the setup parameters are initialized to sensible
0080: * values and won't need to be modified. Most users will only need to
0081: * construct a new <code>HttpRequest</code> object and then call
0082: * <code>getInputStream</code> to read the contents. The rest of the
0083: * member variables and methods are only needed for advanced behavior.
0084: * <p>
0085: * The <code>HttpRequest</code> class is intended to be a replacement for the
0086: * <code>URLConnection</code> class. It operates at a lower level and makes
0087: * fewer decisions on behavior. Some differences between the
0088: * <code>HttpRequest</code> class and the <code>URLConnection</code> class
0089: * follow: <ul>
0090: * <li> there are no undocumented global variables (specified in
0091: * <code>System.getProperties</code>) that modify the behavior of
0092: * <code>HttpRequest</code>.
0093: * <li> <code>HttpRequest</code> does not automatically follow redirects.
0094: * <li> <code>HttpRequest</code> does not turn HTTP responses with a status
0095: * code other than "200 OK" into <code>IOExceptions</code>. Sometimes
0096: * it may be necessary and even quite useful to examine the results of
0097: * an "unsuccessful" HTTP request.
0098: * <li> <code>HttpRequest</code> issues HTTP/1.1 requests and handles
0099: * HTTP/0.9, HTTP/1.0, and HTTP/1.1 responses.
0100: * <li> the <code>URLConnection</code> class leaks open sockets if there is
0101: * an error reading the response or if the target does not use
0102: * Keep-Alive, and depends upon the garabge collector to close and
0103: * release the open socket in these cases, which is unreliable because
0104: * it may lead to intermittently running out of sockets if the garbage
0105: * collector doesn't run often enough.
0106: * <li> If the user doesn't read all the data from an
0107: * <code>URLConnection</code>, there are bugs in its implementation
0108: * (as of JDK1.2) that may cause the program to block forever and/or
0109: * read an insufficient amount of data before trying to reuse the
0110: * underlying socket.
0111: * </ul>
0112: * <p>
0113: * A number of the fields in the <code>HttpRequest</code> object are public,
0114: * by design. Most of the methods mentioned above are convenience methods;
0115: * the underlying data fields are meant to be accessed for more complicated
0116: * operations, such as changing the socket factory or accessing the raw HTTP
0117: * response line. Note however, that the order of the methods described
0118: * above is important. For instance, the user cannot examine the response
0119: * headers (by calling <code>getResponseHeader</code> or by examining the
0120: * variable <code>responseHeaders</code>) without first having connected to
0121: * the host.
0122: * <p>
0123: * However, if the user wants to modify the default behavior, the
0124: * <code>HttpRequest</code> uses the value of a number of variables and
0125: * automatically sets some HTTP headers when sending the request. The user
0126: * can change these settings up until the time <code>connect</code> is
0127: * called, as follows: <dl>
0128: * <dt> variable {@link #version}
0129: * <dd> By default, the <code>HttpRequest</code> issues HTTP/1.1
0130: * requests. The user can set <code>version</code> to change this to
0131: * HTTP/1.0.
0132: * <dt> variable {@link #method}
0133: * <dd> If <code>method</code> is <code>null</code> (the default),
0134: * the <code>HttpRequest</code> decides what the HTTP request method
0135: * should be as follows: If the user has called
0136: * <code>getOutputStream</code>, then the method will be "POST",
0137: * otherwise the method will be "GET".
0138: * <dt> variable {@link #proxyHost}
0139: * <dd> If the proxy host is specified, the HTTP request will be
0140: * sent via the specified proxy: <ul>
0141: * <li> <code>connect</code> opens a connection to the proxy.
0142: * <li> uses the "Proxy-Connection" header to keep alive the connection.
0143: * <li> sends a fully qualified URL in the request line, for example
0144: * "http://www.foo.com/index.html". The fully qualified URL
0145: * tells the proxy to forward the request to the specified host.
0146: * </ul>
0147: * Otherwise, the HTTP request will go directly to the host: <ul>
0148: * <li> <code>connect</code> opens a connection to the remote host.
0149: * <li> uses the "Connection" header to keep alive the connection.
0150: * <li> sends a host-relative URL in the request line, for example
0151: * "/index.html". The relative URL is derived from the fully
0152: * qualified URL used to construct this <code>HttpRequest</code>.
0153: * </ul>
0154: * <dt> header "Connection" or "Proxy-Connection"
0155: * <dd> The <code>HttpRequest</code> sets the appropriate connection
0156: * header to "Keep-Alive" to keep alive the connection to the host or
0157: * proxy (respectively). By setting the appropriate connection header,
0158: * the user can control whether the <code>HttpRequest</code> tries to
0159: * use Keep-Alives.
0160: * <dt> header "Host"
0161: * <dd> The HTTP/1.1 protocol requires that the "Host" header be set
0162: * to the name of the machine being contacted. By default, this is
0163: * derived from the URL used to construct the <code>HttpRequest</code>,
0164: * and is set automatically if the user does not set it.
0165: * <dt> header "Content-Length"
0166: * <dd> If the user calls <code>getOutputStream</code> and writes some
0167: * data to it, the "Content-Length" header will be set to the amount of
0168: * data that has been written at the time that <code>connect</code>
0169: * is called.
0170: * </dl>
0171: * <hr>
0172: * Once all data has been read from the remote host, the underlying socket
0173: * may be automatically recycled and used again for subsequent requests to
0174: * the same remote host. If the user is not planning on reading all the data
0175: * from the remote host, the user should call <code>close</code> to release
0176: * the socket. Although it happens under the covers, the user should be
0177: * aware that if an IOException occurs or once data has been read normally
0178: * from the remote host, <code>close</code> is called automatically. This
0179: * is to ensure that the minimal number of sockets are left open at any time.
0180: * <p>
0181: * The input stream that <code>getInputStream</code> provides automatically
0182: * hides whether the remote host is providing HTTP/1.1 "chunked" encoding or
0183: * regular streaming data. The user can simply read until reaching the
0184: * end of the input stream, which signifies that all the available data from
0185: * this request has been read. If reading from a "chunked" source, the
0186: * data is automatically de-chunked as it is presented to the user. Currently,
0187: * no access is provided to the underlying raw input stream.
0188: *
0189: * @author Colin Stevens (colin.stevens@sun.com)
0190: * @version 1.12 00/07/11
0191: */
0192: public class HttpRequest {
0193: /**
0194: * Timeout (in msec) to drain an input stream that has been closed before
0195: * the entire HTTP response has been read.
0196: * <p>
0197: * If the user closes the <code>HttpRequest</code> before reading all of
0198: * the data, but the remote host has agreed to keep this socket alive, we
0199: * need to read and discard the rest of the response before issuing a new
0200: * request. If it takes longer than <code>DRAIN_TIMEOUT</code> to read
0201: * and discard the data, we will just forcefully close the connection to
0202: * the remote host rather than waiting to read any more.
0203: * <p>
0204: * Default value is 10000.
0205: */
0206: public static int DRAIN_TIMEOUT = 10000;
0207:
0208: /**
0209: * Maximum length of a line in the HTTP response headers (sanity check).
0210: * <p>
0211: * If an HTTP response line is longer than this, the response is
0212: * considered to be malformed.
0213: * <p>
0214: * Default value is 1000.
0215: */
0216: public static int LINE_LIMIT = 1000;
0217:
0218: /**
0219: * The default HTTP version string to send to the remote host when
0220: * issuing requests.
0221: * <p>
0222: * The default value can be overridden on a per-request basis by
0223: * setting the <code>version</code> instance variable.
0224: * <p>
0225: * Default value is "HTTP/1.1".
0226: *
0227: * @see #version
0228: */
0229: public static String defaultHTTPVersion = "HTTP/1.1";
0230:
0231: /**
0232: * The default proxy host for HTTP requests. If non-<code>null</code>,
0233: * then all new HTTP requests will be sent via this proxy. If
0234: * <code>null</code>, then all new HTTP requests are sent directly to
0235: * the host specified when the <code>HttpRequest</code> object was
0236: * constructed.
0237: * <p>
0238: * The default value can be overridden on a per-request basis by
0239: * calling the <code>setProxy</code> method or setting the
0240: * <code>proxyHost</code> instance variables.
0241: * <p>
0242: * Default value is <code>null</code>.
0243: *
0244: * @see #defaultProxyPort
0245: * @see #proxyHost
0246: * @see #setProxy
0247: */
0248: public static String defaultProxyHost = null;
0249:
0250: /**
0251: * The default proxy port for HTTP requests.
0252: * <p>
0253: * Default value is <code>80</code>.
0254: *
0255: * @see #defaultProxyHost
0256: * @see #proxyPort
0257: */
0258: public static int defaultProxyPort = 80;
0259:
0260: /**
0261: * The factory for constructing new Sockets objects used to connect to
0262: * remote hosts when issuing HTTP requests. The user can set this
0263: * to provide a new type of socket, such as SSL sockets.
0264: * <p>
0265: * Default value is <code>null</code>, which signifies plain sockets.
0266: */
0267: public static SocketFactory socketFactory = null;
0268:
0269: /**
0270: * The cache of idle sockets. Once a request has been handled, the
0271: * now-idle socket can be remembered and reused later if another HTTP
0272: * request is made to the same remote host.
0273: */
0274: public static HttpSocketPool pool = new SimpleHttpSocketPool();
0275:
0276: /**
0277: * The URL used to construct this <code>HttpRequest</code>.
0278: */
0279: public URL url;
0280:
0281: /**
0282: * The host extracted from the URL used to construct this
0283: * <code>HttpRequest</code>.
0284: *
0285: * @see #url
0286: */
0287: public String host;
0288:
0289: /**
0290: * The port extracted from the URL used to construct this
0291: * <code>HttpRequest</code>.
0292: *
0293: * @see #url
0294: */
0295: public int port;
0296:
0297: /**
0298: * If non-<code>null</code>, sends this HTTP request via the specified
0299: * proxy host and port.
0300: * <p>
0301: * Initialized from <code>defaultProxyHost</code>, but may be changed
0302: * by the user at any time up until the HTTP request is actually sent.
0303: *
0304: * @see #defaultProxyHost
0305: * @see #proxyPort
0306: * @see #setProxy
0307: * @see #connect
0308: */
0309: public String proxyHost;
0310:
0311: /**
0312: * The proxy port.
0313: *
0314: * @see #proxyHost
0315: */
0316: public int proxyPort;
0317:
0318: boolean connected;
0319: boolean eof;
0320: HttpSocket hs;
0321:
0322: /**
0323: * The HTTP method, such as "GET", "POST", or "HEAD".
0324: * <p>
0325: * May be set by the user at any time up until the HTTP request is
0326: * actually sent.
0327: */
0328: public String method;
0329:
0330: /**
0331: * The HTTP version string.
0332: * <p>
0333: * Initialized from <code>defaultHTTPVersion</code>, but may be changed
0334: * by the user at any time up until the HTTP request is actually sent.
0335: */
0336: public String version;
0337:
0338: /**
0339: * The headers for the HTTP request. All of these headers will be sent
0340: * when the connection is actually made.
0341: */
0342: public MimeHeaders requestHeaders;
0343: ByteArrayOutputStream postData;
0344:
0345: String uri;
0346: String connectionHeader;
0347:
0348: HttpInputStream in;
0349: InputStream under;
0350:
0351: /**
0352: * The status line from the HTTP response. This field is not valid until
0353: * after <code>connect</code> has been called and the HTTP response has
0354: * been read.
0355: */
0356: public String status;
0357:
0358: /**
0359: * The headers that were present in the HTTP response. This field is
0360: * not valid until after <code>connect</code> has been called and the
0361: * HTTP response has been read.
0362: */
0363: public MimeHeaders responseHeaders;
0364:
0365: /*
0366: * Cached value of keep-alive from the response headers.
0367: */
0368: boolean keepAlive;
0369:
0370: /**
0371: * An artifact of HTTP/1.1 chunked encoding. At the end of an HTTP/1.1
0372: * chunked response, there may be more MimeHeaders. It is only possible
0373: * to access these MimeHeaders after all the data from the input stream
0374: * returned by <code>getInputStream</code> has been read. At that point,
0375: * this field will automatically be initialized to the set of any headers
0376: * that were found. If not reading from an HTTP/1.1 chunked source, then
0377: * this field is irrelevant and will remain <code>null</code>.
0378: */
0379: public MimeHeaders responseTrailers;
0380:
0381: /**
0382: * Creates a new <code>HttpRequest</code> object that will send an
0383: * HTTP request to fetch the resource represented by the URL.
0384: * <p>
0385: * The host specified by the URL is <b>not</b> contacted at this time.
0386: *
0387: * @param url
0388: * A fully qualified "http:" URL.
0389: *
0390: * @throws IllegalArgumentException
0391: * if <code>url</code> is not an "http:" URL.
0392: */
0393: public HttpRequest(URL url) {
0394: if (url.getProtocol().equals("http") == false) {
0395: throw new IllegalArgumentException(url.toString());
0396: }
0397:
0398: this .url = url;
0399:
0400: this .host = url.getHost();
0401: this .port = url.getPort();
0402: if (this .port < 0) {
0403: this .port = 80;
0404: }
0405: this .proxyHost = defaultProxyHost;
0406: this .proxyPort = defaultProxyPort;
0407:
0408: this .version = defaultHTTPVersion;
0409: this .requestHeaders = new MimeHeaders();
0410: this .responseHeaders = new MimeHeaders();
0411: }
0412:
0413: /**
0414: * Creates a new <code>HttpRequest</code> object that will send an
0415: * HTTP request to fetch the resource represented by the URL.
0416: * <p>
0417: * The host specified by the URL is <b>not</b> contacted at this time.
0418: *
0419: * @param url
0420: * A string representing a fully qualified "http:" URL.
0421: *
0422: * @throws IllegalArgumentException
0423: * if <code>url</code> is not a well-formed "http:" URL.
0424: */
0425: public HttpRequest(String url) {
0426: this (toURL(url));
0427: }
0428:
0429: /*
0430: * Artifact of Java: cannot implement HttpRequest(String) as follows
0431: * because <code>this(new URL(url))</code> must be first line in
0432: * constructor; it can't be inside of try statement:
0433: *
0434: * public HttpRequest(String url) {
0435: * try {
0436: * this(new URL(url));
0437: * } catch (MalformedURLException e) {
0438: * throw new IllegalArgumentException(url);
0439: * }
0440: * }
0441: */
0442: private static URL toURL(String url) {
0443: try {
0444: return new URL(url);
0445: } catch (MalformedURLException e) {
0446: throw new IllegalArgumentException(url);
0447: }
0448: }
0449:
0450: /**
0451: * Sets the HTTP method to the specified value. Some of the normal
0452: * HTTP methods are "GET", "POST", "HEAD", "PUT", "DELETE", but the
0453: * user can set the method to any value desired.
0454: * <p>
0455: * If this method is called, it must be called before <code>connect</code>
0456: * is called. Otherwise it will have no effect.
0457: *
0458: * @param method
0459: * The string for the HTTP method, or <code>null</code> to
0460: * allow this <code>HttpRequest</code> to pick the method for
0461: * itself.
0462: */
0463: public void setMethod(String method) {
0464: this .method = method;
0465: }
0466:
0467: /**
0468: * Sets the proxy for this request. The HTTP proxy request will be sent
0469: * to the specified proxy host.
0470: * <p>
0471: * If this method is called, it must be called before <code>connect</code>
0472: * is called. Otherwise it will have no effect.
0473: *
0474: * @param proxyHost
0475: * The proxy that will handle the request, or <code>null</code>
0476: * to not use a proxy.
0477: *
0478: * @param proxyPort
0479: * The port on the proxy, for the proxy request. Ignored if
0480: * <code>proxyHost</code> is <code>null</code>.
0481: */
0482: public void setProxy(String proxyHost, int proxyPort) {
0483: this .proxyHost = proxyHost;
0484: this .proxyPort = proxyPort;
0485: }
0486:
0487: /**
0488: * Sets a request header in the HTTP request that will be issued. In
0489: * order to do fancier things like appending a value to an existing
0490: * request header, the user may directly access the
0491: * <code>requestHeaders</code> variable.
0492: * <p>
0493: * If this method is called, it must be called before <code>connect</code>
0494: * is called. Otherwise it will have no effect.
0495: *
0496: * @param key
0497: * The header name.
0498: *
0499: * @param value
0500: * The value for the request header.
0501: *
0502: * @see #requestHeaders
0503: */
0504: public void setRequestHeader(String key, String value) {
0505: requestHeaders.put(key, value);
0506: }
0507:
0508: /**
0509: * Gets an output stream that can be used for uploading data to the
0510: * host.
0511: * <p>
0512: * If this method is called, it must be called before <code>connect</code>
0513: * is called. Otherwise it will have no effect.
0514: * <p>
0515: * Currently the implementation is not as good as it could be. The
0516: * user should avoid uploading huge amounts of data, for some definition
0517: * of huge.
0518: */
0519: public OutputStream getOutputStream() throws IOException {
0520: if (postData == null) {
0521: postData = new ByteArrayOutputStream();
0522: }
0523: return postData;
0524: }
0525:
0526: /**
0527: * Connect to the target host (or proxy), send the request, and read the
0528: * response headers. Any setup routines must be called before the call
0529: * to this method, and routines to examine the result must be called after
0530: * this method.
0531: * <p>
0532: *
0533: * @throws UnknownHostException
0534: * if the target host (or proxy) could not be contacted.
0535: *
0536: * @throws IOException
0537: * if there is a problem writing the HTTP request or reading
0538: * the HTTP response headers.
0539: */
0540: public void connect() throws UnknownHostException, IOException {
0541: if (connected) {
0542: return;
0543: }
0544: connected = true;
0545:
0546: prepareHeaders();
0547: openSocket(true);
0548: try {
0549: try {
0550: sendRequest();
0551: readStatusLine();
0552: } catch (IOException e) {
0553: if (hs.firstTime) {
0554: throw e;
0555: }
0556: closeSocket(false);
0557: openSocket(false);
0558: sendRequest();
0559: readStatusLine();
0560: }
0561: responseHeaders.read(in);
0562:
0563: //System.out.println(status);
0564: //responseHeaders.print(System.out);
0565: //System.out.println();
0566: } catch (IOException e) {
0567: closeSocket(false);
0568: throw e;
0569: }
0570: parseResponse();
0571: }
0572:
0573: void prepareHeaders() {
0574: if (postData != null) {
0575: if (method == null) {
0576: method = "POST";
0577: }
0578: setRequestHeader("Content-Length", Integer
0579: .toString(postData.size()));
0580: }
0581: if (method == null) {
0582: method = "GET";
0583: }
0584:
0585: if (proxyHost == null) {
0586: uri = url.getFile();
0587: connectionHeader = "Connection";
0588: } else {
0589: uri = url.toString();
0590: connectionHeader = "Proxy-Connection";
0591: }
0592:
0593: requestHeaders.putIfNotPresent(connectionHeader, "Keep-Alive");
0594: requestHeaders.putIfNotPresent("Host", host + ":" + port);
0595: }
0596:
0597: void openSocket(boolean reuse) throws IOException {
0598: String targetHost;
0599: int targetPort;
0600:
0601: if (proxyHost != null) {
0602: targetHost = proxyHost;
0603: targetPort = proxyPort;
0604: } else {
0605: targetHost = host;
0606: targetPort = port;
0607: }
0608:
0609: hs = pool.get(targetHost, targetPort, reuse);
0610: under = hs.in;
0611: in = new HttpInputStream(under);
0612: }
0613:
0614: void closeSocket(boolean reuse) {
0615: if (hs != null) {
0616: HttpSocket tmp = hs;
0617: hs = null;
0618:
0619: keepAlive &= reuse;
0620:
0621: /*
0622: * Before we can reuse a keep-alive socket, we must first drain
0623: * the input stream if there is any data left in it. The soft
0624: * 'eof' flag will have been set if we have already read all the
0625: * data that we're supposed to read and the socket is ready to be
0626: * recycled now.
0627: */
0628:
0629: if (keepAlive && !eof) {
0630: new BackgroundCloser(tmp, under, DRAIN_TIMEOUT).start();
0631: } else {
0632: pool.close(tmp, keepAlive);
0633: }
0634: }
0635: }
0636:
0637: class BackgroundCloser extends Thread {
0638: HttpSocket hs;
0639: InputStream in;
0640: int timeout;
0641: Killer killer;
0642:
0643: BackgroundCloser(HttpSocket hs, InputStream in, int timeout) {
0644: this .hs = hs;
0645: this .in = in;
0646: this .timeout = timeout;
0647: }
0648:
0649: public void start() {
0650: killer = new Killer(this );
0651: killer.start();
0652: super .start();
0653: }
0654:
0655: public void run() {
0656: try {
0657: byte[] buf = new byte[4096];
0658:
0659: while (true) {
0660: if (in.read(buf, 0, buf.length) < 0) {
0661: break;
0662: }
0663: }
0664: } catch (IOException e) {
0665: keepAlive = false;
0666: }
0667: pool.close(hs, keepAlive);
0668: killer.interrupt();
0669: }
0670: }
0671:
0672: static class Killer extends Thread {
0673: BackgroundCloser b;
0674: int timeout;
0675:
0676: Killer(BackgroundCloser b) {
0677: this .b = b;
0678: }
0679:
0680: public void run() {
0681: try {
0682: Thread.sleep(b.timeout);
0683: b.interrupt();
0684: } catch (Exception e) {
0685: }
0686: }
0687: }
0688:
0689: void sendRequest() throws IOException {
0690: //System.out.print(method + " " + uri + " " + version + "\r\n");
0691: //requestHeaders.print(System.out);
0692: //System.out.print("\r\n");
0693:
0694: PrintStream p = new PrintStream(hs.out);
0695: p.print(method + " " + uri + " " + version + "\r\n");
0696: requestHeaders.print(p);
0697: p.print("\r\n");
0698:
0699: if (postData != null) {
0700: postData.writeTo(p);
0701: postData = null; // Release memory.
0702: }
0703: p.flush();
0704: }
0705:
0706: void readStatusLine() throws IOException {
0707: while (true) {
0708: status = in.readLine(LINE_LIMIT);
0709: if (status == null) {
0710: throw new EOFException();
0711: }
0712: if (status.startsWith("HTTP/1.1 100")
0713: || status.startsWith("HTTP/1.0 100")) {
0714: /*
0715: * Ignore the "100 Continue" response that some HTTP/1.1
0716: * servers send. We can't depend upon it being sent, because
0717: * we might be talking to an HTTP/1.0 server or an HTTP/1.1
0718: * server that doesn't send the "100 Continue" response, so
0719: * we can't use the response for any decision making, such as
0720: * not sending the post data.
0721: *
0722: * www.u-net.com sends "HTTP/1.0 100 Continue"!
0723: */
0724:
0725: while (true) {
0726: status = in.readLine();
0727: if ((status == null) || (status.length() == 0)) {
0728: break;
0729: }
0730: }
0731: } else if (status.startsWith("HTTP/1.")) {
0732: return;
0733: } else if (status.length() == 0) {
0734: // System.out.println(this + ": got a blank line");
0735: } else if (status.length() == LINE_LIMIT) {
0736: throw new IOException("malformed server response");
0737: } else if (hs.firstTime) {
0738: /*
0739: * Some servers don't send back any headers, even if they
0740: * accept a HTTP/1.0 or greater request! We have to push
0741: * back this line, so it can be re-read as the body.
0742: * Since this is coming back with no headers, the content
0743: * length will be unknown and so the socket will be closed.
0744: */
0745:
0746: // System.out.println("receiving HTTP/0.9 response");
0747: PushbackInputStream pin = new PushbackInputStream(
0748: hs.in, status.length() + 4);
0749:
0750: pin.unread('\n');
0751: pin.unread('\r');
0752: for (int i = status.length(); --i >= 0;) {
0753: pin.unread(status.charAt(i));
0754: }
0755:
0756: /*
0757: * And push back a blank line, so the user thinks it got to
0758: * the end of the headers
0759: */
0760: pin.unread('\n');
0761: pin.unread('\r');
0762:
0763: status = "HTTP/1.0 200 OK";
0764: hs.in = pin;
0765: under = pin;
0766: in = new HttpInputStream(under);
0767: break;
0768: } else {
0769: /*
0770: * If we see funny responses (missing headers, etc.) from a
0771: * socket that we've reused, then we probably got out of sync
0772: * with the remote host (e.g., didn't read enough from the
0773: * last response), and should abort this request.
0774: */
0775:
0776: throw new IOException("malformed server response");
0777: }
0778: }
0779: }
0780:
0781: void parseResponse() {
0782: String str;
0783:
0784: str = getResponseHeader(connectionHeader);
0785: if (str != null) {
0786: keepAlive = str.equalsIgnoreCase("Keep-Alive");
0787: } else if (status.startsWith("HTTP/1.1")) {
0788: keepAlive = true;
0789: } else {
0790: keepAlive = false;
0791: }
0792:
0793: str = getResponseHeader("Transfer-Encoding");
0794: if ((str != null) && str.equals("chunked")) {
0795: under = new UnchunkingInputStream(this );
0796: in = new RecycleInputStream(this , under);
0797: return;
0798: }
0799:
0800: int contentLength = getContentLength();
0801: if (contentLength < 0) {
0802: /*
0803: * Some servers leave off the content length for return codes
0804: * known to require no content.
0805: */
0806:
0807: if (status.indexOf("304") > 0 || status.indexOf("204") > 0) {
0808: responseHeaders.put("Content-Length", "0");
0809: contentLength = 0;
0810: }
0811: }
0812:
0813: if ((contentLength == 0) || method.equals("HEAD")) {
0814: under = new NullInputStream();
0815: in = new HttpInputStream(under);
0816: closeSocket(keepAlive);
0817: } else if (contentLength > 0) {
0818: under = new LimitInputStream(this , contentLength);
0819: in = new RecycleInputStream(this , under);
0820: } else {
0821: keepAlive = false;
0822: in = new RecycleInputStream(this , under);
0823: }
0824: }
0825:
0826: /**
0827: * Gets an input stream that can be used to read the body of the
0828: * HTTP response. Unlike the other convenience methods for accessing
0829: * the HTTP response, this one automatically connects to the
0830: * target host if not already connected.
0831: * <p>
0832: * The input stream that <code>getInputStream</code> provides
0833: * automatically hides the differences between "Content-Length", no
0834: * "Content-Length", and "chunked" for HTTP/1.0 and HTTP/1.1 responses.
0835: * In all cases, the user can simply read until reaching the end of the
0836: * input stream, which signifies that all the available data from this
0837: * request has been read. (If reading from a "chunked" source, the data
0838: * is automatically de-chunked as it is presented to the user. There is
0839: * no way to access the raw underlying stream that contains the HTTP/1.1
0840: * chunking packets.)
0841: *
0842: * @throws IOException
0843: * if there is problem connecting to the target.
0844: *
0845: * @see #connect
0846: */
0847: public HttpInputStream getInputStream() throws IOException {
0848: connect();
0849: return in;
0850: }
0851:
0852: /**
0853: * Gracefully closes this HTTP request when user is done with it.
0854: * <p>
0855: * The user can either call this method or <code>close</code> on the
0856: * input stream obtained from the <code>getInputStream</code>
0857: * method -- the results are the same.
0858: * <p>
0859: * When all the response data is read from the input stream, the
0860: * input stream is automatically closed (recycled). If the user is
0861: * not going to read all the response data from input stream, the user
0862: * must call <code>close</code> to
0863: * release the resources associated with the open request. Otherwise
0864: * the program may consume all available sockets, waiting forever for
0865: * the user to finish reading.
0866: * <p>
0867: * Note that the input stream is automatically closed if the input
0868: * stream throws an exception while reading.
0869: * <p>
0870: * In order to interrupt a pending I/O operation in another thread
0871: * (for example, to stop a request that is taking too long), the user
0872: * should call <code>disconnect</code> or interrupt the blocked thread.
0873: * The user should not call <code>close</code> in this case because
0874: * <code>close</code> will not interrupt the pending I/O operation.
0875: * <p>
0876: * Closing the request multiple times is allowed.
0877: * <p>
0878: * In order to make sure that open sockets are not left lying around
0879: * the user should use code similar to the following:
0880: * <pre>
0881: * OutputStream out = ...
0882: * HttpRequest http = new HttpRequest("http://bob.com/index.html");
0883: * try {
0884: * HttpInputStream in = http.getInputStream();
0885: * in.copyTo(out);
0886: * } finally {
0887: * // Copying to "out" could have failed. Close "http" in case
0888: * // not all the data has been read from it yet.
0889: * http.close();
0890: * }
0891: * </pre>
0892: */
0893:
0894: public void close() {
0895: closeSocket(true);
0896: }
0897:
0898: /**
0899: * Interrupts this HTTP request. Can be used to halt an in-progress
0900: * HTTP request from another thread, by causing it to
0901: * throw an <code>InterruptedIOException</code> during the connect
0902: * or while reading from the input stream, depending upon what state
0903: * this HTTP request is in when it is disconnected.
0904: *
0905: * @see #close
0906: */
0907: public void disconnect() {
0908: closeSocket(false);
0909: }
0910:
0911: /**
0912: * Gets the HTTP response status code. From responses like:
0913: * <pre>
0914: * HTTP/1.0 200 OK
0915: * HTTP/1.0 401 Unauthorized
0916: * </pre>
0917: * this method extracts the integers <code>200</code> and <code>401</code>
0918: * respectively. Returns <code>-1</code> if the response status code
0919: * was malformed.
0920: * <p>
0921: * If this method is called, it must be called after <code>connect</code>
0922: * has been called. Otherwise the information is not yet available and
0923: * this method will return <code>-1</code>.
0924: * <p>
0925: * For advanced features, the user can directly access the
0926: * <code>status</code> variable.
0927: *
0928: * @return The integer status code from the HTTP response.
0929: *
0930: * @see #connect
0931: * @see #status
0932: */
0933: public int getResponseCode() {
0934: try {
0935: int start = status.indexOf(' ') + 1;
0936: int end = status.indexOf(' ', start + 1);
0937: if (end < 0) {
0938: /*
0939: * Sometimes the status line has the status code but no
0940: * status phrase.
0941: */
0942: end = status.length();
0943: }
0944: return Integer.parseInt(status.substring(start, end));
0945: } catch (Exception e) {
0946: return -1;
0947: }
0948: }
0949:
0950: /**
0951: * Gets the value associated with the given case-insensitive header name
0952: * from the HTTP response.
0953: * <p>
0954: * If this method is called, it must be called after <code>connect</code>
0955: * has been called. Otherwise the information is not available and
0956: * this method will return <code>null</code>.
0957: * <p>
0958: * For advanced features, such as enumerating over all response headers,
0959: * the user should directly access the <code>responseHeaders</code>
0960: * variable.
0961: *
0962: * @param key
0963: * The case-insensitive name of the response header.
0964: *
0965: * @return The value associated with the given name, or <code>null</code>
0966: * if there is no such header in the response.
0967: *
0968: * @see #connect
0969: * @see #responseHeaders
0970: */
0971: public String getResponseHeader(String key) {
0972: return responseHeaders.get(key);
0973: }
0974:
0975: /**
0976: * Convenience method to get the "Content-Length" header from the
0977: * HTTP response.
0978: * <p>
0979: * If this method is called, it must be called after <code>connect</code>
0980: * has been called. Otherwise the information is not available and
0981: * this method will return <code>-1</code>.
0982: *
0983: * @return The content length specified in the response headers, or
0984: * <code>-1</code> if the length was not specified or malformed
0985: * (not a number).
0986: *
0987: * @see #connect
0988: * @see #getResponseHeader
0989: */
0990: public int getContentLength() {
0991: try {
0992: return Integer.parseInt(responseHeaders
0993: .get("Content-Length"));
0994: } catch (Exception e) {
0995: return -1;
0996: }
0997: }
0998:
0999: /**
1000: * Removes all the point-to-point (hop-by-hop) headers from
1001: * the given mime headers.
1002: *
1003: * @param headers
1004: * The mime headers to be modified.
1005: *
1006: * @param response
1007: * <code>true</code> to remove the point-to-point <b>response</b>
1008: * headers, <code>false</code> to remove the point-to-point
1009: * <b>request</b> headers.
1010: *
1011: * @see <a href="http://www.cis.ohio-state.edu/htbin/rfc/rfc2068.html">RFC 2068</a>
1012: */
1013: public static void removePointToPointHeaders(MimeHeaders headers,
1014: boolean response) {
1015: headers.remove("Connection");
1016: headers.remove("Proxy-Connection");
1017: headers.remove("Keep-Alive");
1018: headers.remove("Upgrade");
1019:
1020: if (response == false) {
1021: headers.remove("Proxy-Authorization");
1022: } else {
1023: headers.remove("Proxy-Authenticate");
1024: headers.remove("Public");
1025: headers.remove("Transfer-Encoding");
1026: }
1027: }
1028:
1029: }
1030:
1031: class RecycleInputStream extends HttpInputStream {
1032: HttpRequest target;
1033: boolean closed;
1034:
1035: public RecycleInputStream(HttpRequest target, InputStream in) {
1036: super (in);
1037: this .target = target;
1038: }
1039:
1040: /**
1041: * Reads from the underlying input stream, which might be a raw
1042: * input stream, a limit input stream, or an unchunking input stream.
1043: * If we get EOF or there is an error reading, close the socket.
1044: */
1045: public int read() throws IOException {
1046: if (closed) {
1047: return -1;
1048: }
1049: try {
1050: int ch = in.read();
1051: if (ch < 0) {
1052: close(false);
1053: }
1054: return ch;
1055: } catch (IOException e) {
1056: close(false);
1057: throw e;
1058: }
1059: }
1060:
1061: public int read(byte[] buf, int off, int len) throws IOException {
1062: if (closed) {
1063: return -1;
1064: }
1065: try {
1066: int count = in.read(buf, off, len);
1067: if (count < 0) {
1068: close(false);
1069: }
1070: return count;
1071: } catch (IOException e) {
1072: close(false);
1073: throw e;
1074: }
1075: }
1076:
1077: private void close(boolean reuse) {
1078: if (closed == false) {
1079: closed = true;
1080: target.closeSocket(reuse);
1081: }
1082: }
1083:
1084: public void close() {
1085: close(true);
1086: }
1087: }
1088:
1089: class NullInputStream extends InputStream {
1090: public int read() {
1091: return -1;
1092: }
1093:
1094: public int read(char[] buf, int off, int len) {
1095: return -1;
1096: }
1097: }
1098:
1099: class LimitInputStream extends HttpInputStream {
1100: HttpRequest target;
1101: int limit;
1102:
1103: public LimitInputStream(HttpRequest target, int limit) {
1104: super (target.hs.in);
1105: this .target = target;
1106: this .limit = limit;
1107: }
1108:
1109: public int read() throws IOException {
1110: if (limit <= 0) {
1111: return -1;
1112: }
1113:
1114: int ch = in.read();
1115: if ((ch >= 0) && (--limit <= 0)) {
1116: target.eof = true;
1117: target.closeSocket(true);
1118: }
1119: return ch;
1120: }
1121:
1122: public int read(byte[] buf, int off, int len) throws IOException {
1123: if (limit <= 0) {
1124: return -1;
1125: }
1126:
1127: len = Math.min(len, limit);
1128: int count = in.read(buf, off, len);
1129: if (count < 0) {
1130: limit = 0;
1131: return -1;
1132: }
1133: limit -= count;
1134: if (limit <= 0) {
1135: target.eof = true;
1136: target.closeSocket(true);
1137: }
1138: return count;
1139: }
1140: }
1141:
1142: class UnchunkingInputStream extends HttpInputStream {
1143: HttpRequest target;
1144: boolean eof;
1145: int bytesLeft;
1146:
1147: public UnchunkingInputStream(HttpRequest target) {
1148: super (target.in);
1149: this .target = target;
1150: }
1151:
1152: public int read() throws IOException {
1153: if ((bytesLeft <= 0) && (getChunkSize() == false)) {
1154: return -1;
1155: }
1156: bytesLeft--;
1157: return in.read();
1158: }
1159:
1160: public int read(byte[] buf, int off, int len) throws IOException {
1161: int total = 0;
1162: while (true) {
1163: if ((bytesLeft <= 0) && (getChunkSize() == false)) {
1164: break;
1165: }
1166: int count = super .read(buf, off, Math.min(bytesLeft, len));
1167: total += count;
1168: off += count;
1169: bytesLeft -= count;
1170: len -= count;
1171:
1172: if ((len <= 0) || (available() == 0)) {
1173: break;
1174: }
1175: }
1176:
1177: return (total == 0) ? -1 : total;
1178: }
1179:
1180: private boolean getChunkSize() throws IOException {
1181: if (eof) {
1182: return false;
1183: }
1184:
1185: /*
1186: * Although HTTP/1.1 chunking spec says that there is one "\r\n"
1187: * between chunks, some servers (for example, maps.yahoo.com)
1188: * send more than one blank line between chunks. So, read and skip
1189: * all the blank lines seen between chunks.
1190: */
1191:
1192: String line;
1193: do {
1194: // Sanity check: limit chars when expecting a chunk size.
1195:
1196: line = ((HttpInputStream) in)
1197: .readLine(HttpRequest.LINE_LIMIT);
1198: } while ((line != null) && (line.length() == 0));
1199:
1200: try {
1201: bytesLeft = Integer.parseInt(line.trim(), 16);
1202: } catch (Exception e) {
1203: throw new IOException("malformed chunk");
1204: }
1205: if (bytesLeft == 0) {
1206: eof = true;
1207: target.responseTrailers = new MimeHeaders(
1208: (HttpInputStream) in);
1209: target.eof = true;
1210: target.closeSocket(true);
1211: return false;
1212: }
1213:
1214: return true;
1215: }
1216: }
1217:
1218: class SimpleHttpSocketPool implements Runnable, HttpSocketPool {
1219: public int maxIdle = 10; // size of the socket pool
1220: public int maxAge = 20000; // max age of idle socket (mseconds)
1221: public int reapInterval = 10000;// interval (in msec) to run reaper thread
1222:
1223: // pool of idle connections
1224: Vector idle = new Vector();
1225:
1226: /**
1227: * Start the background thread that removes old connections
1228: */
1229:
1230: Thread reaper;
1231:
1232: public SimpleHttpSocketPool() {
1233: reaper = new Thread(this );
1234: reaper.setDaemon(true);
1235: reaper.start();
1236: }
1237:
1238: /**
1239: * Get a potentially "pooled" target object.
1240: * Call this instead of the constructor to use the pool.
1241: * @param host the target content server (or web proxy)
1242: * @param port target web server port
1243: * @param proxy if true, use telnet passthru mode.
1244: */
1245:
1246: public HttpSocket get(String host, int port, boolean reuse)
1247: throws IOException, UnknownHostException {
1248: host = host.toLowerCase();
1249:
1250: if (reuse) {
1251: synchronized (idle) {
1252: /*
1253: * Start at end to reuse the most recent socket, which is
1254: * hopefully the most likely to still be alive.
1255: */
1256:
1257: int i = idle.size();
1258: while (--i >= 0) {
1259: HttpSocket hs = (HttpSocket) idle.elementAt(i);
1260: if (hs.host.equals(host) && (hs.port == port)) {
1261: idle.removeElementAt(i);
1262: /*System.out.println("reusing:" + hs);*/
1263: hs.timesUsed++;
1264: return hs;
1265: }
1266: }
1267: }
1268: }
1269:
1270: HttpSocket hs = new HttpSocket(host, port);
1271:
1272: /*System.out.println("new:" + hs);*/
1273:
1274: return hs;
1275: }
1276:
1277: public void close(HttpSocket hs, boolean reuse) {
1278: if (reuse) {
1279: /*System.out.println("recycling: " + hs);*/
1280: synchronized (idle) {
1281: if (idle.size() >= maxIdle) {
1282: HttpSocket bump = (HttpSocket) idle.firstElement();
1283: idle.removeElementAt(0);
1284: bump.close();
1285: }
1286: hs.firstTime = false;
1287: hs.lastUsed = System.currentTimeMillis();
1288: idle.addElement(hs);
1289: }
1290: } else {
1291: /*System.out.println("closing: " + hs);*/
1292: hs.close();
1293: }
1294: }
1295:
1296: int lastSize = -1;
1297:
1298: public void run() {
1299: while (true) {
1300: try {
1301: Thread.sleep(reapInterval);
1302: } catch (InterruptedException e) {
1303: break;
1304: }
1305:
1306: /*
1307: * expire after age seconds
1308: */
1309:
1310: long expired = System.currentTimeMillis() - maxAge;
1311: boolean any = false;
1312: synchronized (idle) {
1313: while (idle.size() > 0) {
1314: HttpSocket hs = (HttpSocket) idle.firstElement();
1315: if (hs.lastUsed >= expired) {
1316: break;
1317: }
1318: any = true;
1319: idle.removeElementAt(0);
1320: hs.close();
1321: }
1322: }
1323:
1324: if (false) {
1325: if (idle.size() > 0 || lastSize != 0) {
1326: long now = System.currentTimeMillis();
1327: System.out.print("socket cache:");
1328: for (int i = 0; i < idle.size(); i++) {
1329: HttpSocket hs = (HttpSocket) idle.elementAt(i);
1330: System.out.print(" (" + hs + " "
1331: + (now - hs.lastUsed) / 1000 + ")");
1332: }
1333: System.out.println();
1334: lastSize = idle.size();
1335: }
1336: }
1337: }
1338: }
1339:
1340: public String toString() {
1341: if (idle == null) {
1342: return "(null)";
1343: }
1344: StringBuffer sb = new StringBuffer();
1345: for (int i = 0; i < idle.size(); i++) {
1346: HttpSocket hs = (HttpSocket) idle.elementAt(i);
1347: sb.append(hs.toString() + ", ");
1348: }
1349: return sb.toString();
1350: }
1351: }
|