001: /*
002: * @(#)RedirectionModule.java 0.3-2 18/06/1999
003: *
004: * This file is part of the HTTPClient package
005: * Copyright (C) 1996-1999 Ronald Tschalär
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public
018: * License along with this library; if not, write to the Free
019: * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
020: * MA 02111-1307, USA
021: *
022: * For questions, suggestions, bug-reports, enhancement-requests etc.
023: * I may be contacted at:
024: *
025: * ronald@innovation.ch
026: *
027: */
028:
029: package HTTPClient;
030:
031: import java.net.InetAddress;
032: import java.net.ProtocolException;
033: import java.net.UnknownHostException;
034: import java.io.IOException;
035: import java.util.Hashtable;
036:
037: /**
038: * This module handles the redirection status codes 301, 302, 303, 305, 306
039: * and 307.
040: *
041: * @version 0.3-2 18/06/1999
042: * @author Ronald Tschalär
043: */
044:
045: class RedirectionModule implements HTTPClientModule, GlobalConstants {
046: /** a list of permanent redirections (301) */
047: private static Hashtable perm_redir_cntxt_list = new Hashtable();
048:
049: /** the level of redirection */
050: private int level;
051:
052: /** the url used in the last redirection */
053: private URI lastURI;
054:
055: // Constructors
056:
057: /**
058: * Start with level 0.
059: */
060: RedirectionModule() {
061: level = 0;
062: lastURI = null;
063: }
064:
065: // Methods
066:
067: /**
068: * Invoked by the HTTPClient.
069: */
070: public int requestHandler(Request req, Response[] resp) {
071: HTTPConnection con = req.getConnection();
072: URI new_loc, cur_loc;
073:
074: try {
075: cur_loc = new URI(con.getProtocol(), con.getHost(), con
076: .getPort(), req.getRequestURI());
077: } catch (ParseException pe) {
078: throw new Error(
079: "HTTPClient Internal Error: unexpected exception '"
080: + pe + "'");
081: }
082:
083: // handle permanent redirections
084:
085: Hashtable perm_redir_list = Util.getList(perm_redir_cntxt_list,
086: req.getConnection().getContext());
087: if ((new_loc = (URI) perm_redir_list.get(cur_loc)) != null) {
088: /* copy query if present in old url but not in new url. This
089: * isn't strictly conforming, but some scripts fail to properly
090: * propagate the query string to the Location header.
091: *
092: * Unfortunately it looks like we're fucked either way: some
093: * scripts fail if you don't propagate the query string, some
094: * fail if you do... God, don't you just love it when people
095: * can't read a spec? Anway, since we can't get it right for
096: * all scripts we opt to follow the spec.
097: String nres = new_loc.getPath(),
098: oquery = Util.getQuery(req.getRequestURI()),
099: nquery = Util.getQuery(nres);
100: if (nquery == null && oquery != null)
101: nres += "?" + oquery;
102: */
103: String nres = new_loc.getPath();
104: req.setRequestURI(nres);
105:
106: try {
107: lastURI = new URI(new_loc, nres);
108: } catch (ParseException pe) {
109: }
110:
111: if (DebugMods)
112: System.err
113: .println("RdirM: matched request in permanent "
114: + "redirection list - redoing request to "
115: + lastURI);
116:
117: if (!sameServer(con, new_loc)) {
118: try {
119: con = new HTTPConnection(new_loc.toURL());
120: } catch (Exception e) {
121: throw new Error(
122: "HTTPClient Internal Error: unexpected "
123: + "exception '" + e + "'");
124: }
125:
126: con.setContext(req.getConnection().getContext());
127: req.setConnection(con);
128: return REQ_NEWCON_RST;
129: } else {
130: return REQ_RESTART;
131: }
132: }
133:
134: return REQ_CONTINUE;
135: }
136:
137: /**
138: * Invoked by the HTTPClient.
139: */
140: public void responsePhase1Handler(Response resp, RoRequest req)
141: throws IOException {
142: int sts = resp.getStatusCode();
143: if (sts < 301 || sts > 307 || sts == 304) {
144: if (lastURI != null) // it's been redirected
145: resp.setEffectiveURI(lastURI);
146: }
147: }
148:
149: /**
150: * Invoked by the HTTPClient.
151: */
152: public int responsePhase2Handler(Response resp, Request req)
153: throws IOException {
154: /* handle various response status codes until satisfied */
155:
156: int sts = resp.getStatusCode();
157: switch (sts) {
158: case 302: // General (temporary) Redirection (handle like 303)
159:
160: if (DebugMods)
161: System.err.println("RdirM: Received status: " + sts
162: + " " + resp.getReasonLine()
163: + " - treating as 303");
164:
165: sts = 303;
166:
167: case 301: // Moved Permanently
168: case 303: // See Other (use GET)
169: case 307: // Moved Temporarily (we mean it!)
170:
171: if (DebugMods)
172: System.err.println("RdirM: Handling status: " + sts
173: + " " + resp.getReasonLine());
174:
175: // the spec says automatic redirection may only be done if
176: // the second request is a HEAD or GET.
177: if (!req.getMethod().equals("GET")
178: && !req.getMethod().equals("HEAD") && sts != 303) {
179: if (DebugMods)
180: System.err.println("RdirM: not redirected because "
181: + "method is neither HEAD nor GET");
182:
183: if (sts == 301 && resp.getHeader("Location") != null)
184: update_perm_redir_list(req, resLocHdr(resp
185: .getHeader("Location"), req));
186:
187: resp.setEffectiveURI(lastURI);
188: return RSP_CONTINUE;
189: }
190:
191: case 305: // Use Proxy
192: case 306: // Switch Proxy
193:
194: if (DebugMods)
195: if (sts == 305 || sts == 306)
196: System.err.println("RdirM: Handling status: " + sts
197: + " " + resp.getReasonLine());
198:
199: // Don't accept 305 from a proxy
200: if (sts == 305
201: && req.getConnection().getProxyHost() != null) {
202: if (DebugMods)
203: System.err.println("RdirM: 305 ignored because "
204: + "a proxy is already in use");
205:
206: resp.setEffectiveURI(lastURI);
207: return RSP_CONTINUE;
208: }
209:
210: /* the level is a primitive way of preventing infinite
211: * redirections. RFC-2068 set the max to 5, but the latest
212: * http draft has loosened this. Since some sites (notably
213: * M$) need more levels, this is now set to the (arbitrary)
214: * value of 15 (god only knows why they need to do even 5
215: * redirections...).
216: */
217: if (level == 15 || resp.getHeader("Location") == null) {
218: if (DebugMods) {
219: if (level == 15)
220: System.err
221: .println("RdirM: not redirected because "
222: + "of too many levels of redirection");
223: else
224: System.err
225: .println("RdirM: not redirected because "
226: + "no Location header was present");
227: }
228:
229: resp.setEffectiveURI(lastURI);
230: return RSP_CONTINUE;
231: }
232: level++;
233:
234: URI loc = resLocHdr(resp.getHeader("Location"), req);
235:
236: if (req.getStream() != null && (sts == 306 || sts == 305))
237: return RSP_CONTINUE;
238:
239: HTTPConnection mvd;
240: boolean new_con = false;
241: String nres;
242:
243: if (sts == 305) {
244: mvd = new HTTPConnection(req.getConnection()
245: .getProtocol(), req.getConnection().getHost(),
246: req.getConnection().getPort());
247: mvd.setCurrentProxy(loc.getHost(), loc.getPort());
248: mvd.setContext(req.getConnection().getContext());
249: new_con = true;
250:
251: nres = req.getRequestURI();
252:
253: /* There was some discussion about this, and especially
254: * Foteos Macrides (Lynx) said a 305 should also imply
255: * a change to GET (for security reasons) - see the thread
256: * starting at
257: * http://www.ics.uci.edu/pub/ietf/http/hypermail/1997q4/0351.html
258: * However, this is not in the latest draft, but since I
259: * agree with Foteos we do it anyway...
260: */
261: req.setMethod("GET");
262: req.setData(null);
263: req.setStream(null);
264: } else if (sts == 306) {
265: // We'll have to wait for Josh to create a new spec here.
266: return RSP_CONTINUE;
267: } else {
268: if (sameServer(req.getConnection(), loc)) {
269: mvd = req.getConnection();
270: nres = loc.getPath();
271: } else {
272: try {
273: mvd = new HTTPConnection(loc.toURL());
274: nres = loc.getPath();
275: } catch (Exception e) {
276: if (req.getConnection().getProxyHost() == null
277: || !loc.getScheme().equalsIgnoreCase(
278: "ftp"))
279: return RSP_CONTINUE;
280:
281: // We're using a proxy and the protocol is ftp -
282: // maybe the proxy will also proxy ftp...
283: mvd = new HTTPConnection("http", req
284: .getConnection().getProxyHost(), req
285: .getConnection().getProxyPort());
286: mvd.setCurrentProxy(null, 0);
287: nres = loc.toExternalForm();
288: }
289:
290: mvd.setContext(req.getConnection().getContext());
291: new_con = true;
292: }
293:
294: /* copy query if present in old url but not in new url.
295: * This isn't strictly conforming, but some scripts fail
296: * to propagate the query properly to the Location
297: * header.
298: *
299: * See comment on line 99.
300: String oquery = Util.getQuery(req.getRequestURI()),
301: nquery = Util.getQuery(nres);
302: if (nquery == null && oquery != null)
303: nres += "?" + oquery;
304: */
305:
306: if (sts == 303 && !req.getMethod().equals("HEAD")) {
307: // 303 means "use GET"
308:
309: req.setMethod("GET");
310: req.setData(null);
311: req.setStream(null);
312: } else if (sts == 301) {
313: // update permanent redirection list
314: try {
315: update_perm_redir_list(req, new URI(loc, nres));
316: } catch (ParseException pe) { /* ??? */
317: }
318:
319: }
320:
321: // Adjust Referer, if present
322: NVPair[] hdrs = req.getHeaders();
323: for (int idx = 0; idx < hdrs.length; idx++)
324: if (hdrs[idx].getName().equalsIgnoreCase("Referer")) {
325: HTTPConnection con = req.getConnection();
326: hdrs[idx] = new NVPair("Referer", con
327: + req.getRequestURI());
328: break;
329: }
330: }
331:
332: req.setConnection(mvd);
333: req.setRequestURI(nres);
334:
335: try {
336: resp.getInputStream().close();
337: } catch (IOException ioe) {
338: }
339:
340: if (sts != 305 && sts != 306) {
341: try {
342: lastURI = new URI(loc, nres);
343: } catch (ParseException pe) { /* ??? */
344: }
345:
346: if (DebugMods)
347: System.err.println("RdirM: request redirected to "
348: + lastURI + " using method "
349: + req.getMethod());
350: } else {
351: if (DebugMods)
352: System.err
353: .println("RdirM: resending request using "
354: + "proxy " + mvd.getProxyHost()
355: + ":" + mvd.getProxyPort());
356: }
357:
358: if (new_con)
359: return RSP_NEWCON_REQ;
360: else
361: return RSP_REQUEST;
362:
363: default:
364:
365: return RSP_CONTINUE;
366: }
367: }
368:
369: /**
370: * Invoked by the HTTPClient.
371: */
372: public void responsePhase3Handler(Response resp, RoRequest req) {
373: }
374:
375: /**
376: * Invoked by the HTTPClient.
377: */
378: public void trailerHandler(Response resp, RoRequest req) {
379: }
380:
381: /**
382: * Update the permanent redirection list.
383: *
384: * @param the original request
385: * @param the new location
386: */
387: private static void update_perm_redir_list(RoRequest req,
388: URI new_loc) {
389: HTTPConnection con = req.getConnection();
390: URI cur_loc = null;
391: try {
392: cur_loc = new URI(con.getProtocol(), con.getHost(), con
393: .getPort(), req.getRequestURI());
394: } catch (ParseException pe) {
395: }
396:
397: if (!cur_loc.equals(new_loc)) {
398: Hashtable perm_redir_list = Util.getList(
399: perm_redir_cntxt_list, con.getContext());
400: perm_redir_list.put(cur_loc, new_loc);
401: }
402: }
403:
404: /**
405: * The Location header field must be an absolute URI, but too many broken
406: * servers use relative URIs. So, try as an absolute URI, and if that
407: * fails try as a relative URI.
408: *
409: * @param loc the Location header field
410: * @param req the Request to resolve relative URI's relative to
411: * @return an absolute URI corresponding to the Location header field
412: * @ throws ProtocolException if the Location header field is completely
413: * unparseable
414: */
415: private URI resLocHdr(String loc, RoRequest req)
416: throws ProtocolException {
417: try {
418: return new URI(loc);
419: } catch (ParseException pe) {
420: // it might be a relative URL (i.e. another broken server)
421: try {
422: URI base = new URI(req.getConnection().getProtocol(),
423: req.getConnection().getHost(), req
424: .getConnection().getPort(), req
425: .getRequestURI());
426: return new URI(base, loc);
427: } catch (ParseException pe2) {
428: throw new ProtocolException(
429: "Malformed URL in Location " + "header: " + loc);
430: }
431: }
432: }
433:
434: /**
435: * Tries to determine as best as possible if <var>url</var> refers
436: * to the same server as <var>con</var> is talking with.
437: *
438: * @param con the HTTPConnection
439: * @param url the http URL
440: * @return true if the url refers to the same server as the connection,
441: * false otherwise.
442: */
443: private boolean sameServer(HTTPConnection con, URI url) {
444: if (!url.getScheme().equalsIgnoreCase(con.getProtocol()))
445: return false;
446:
447: /* we can't do this, because otherwise a server can't redirect to
448: * a new host name (that resolves to the same ip-address as the
449: * old host name).
450: try
451: {
452: compAddr: if (!url.getHost().equalsIgnoreCase(con.getHost()))
453: {
454: InetAddress[] list1 = InetAddress.getAllByName(url.getHost());
455: InetAddress[] list2 = InetAddress.getAllByName(con.getHost());
456: for (int idx1=0; idx1<list1.length; idx1++)
457: for (int idx2=0; idx2<list2.length; idx2++)
458: if (list1[idx1].equals(list2[idx2]))
459: break compAddr;
460: return false;
461: }
462: }
463: catch (UnknownHostException uhe)
464: { return false; }
465: */
466: if (!url.getHost().equalsIgnoreCase(con.getHost()))
467: return false;
468:
469: if (url.getPort() != con.getPort())
470: return false;
471:
472: return true;
473: }
474: }
|