001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005: package com.sun.portal.portlet.rssportlet.filecache;
006:
007: import java.util.List;
008: import java.util.ArrayList;
009: import java.util.Vector;
010: import java.util.Iterator;
011: import java.util.StringTokenizer;
012: import java.util.Enumeration;
013: import java.util.ResourceBundle;
014: import java.util.Properties;
015:
016: import java.io.InputStream;
017: import java.io.IOException;
018: import java.io.UnsupportedEncodingException;
019:
020: import java.net.URL;
021: import java.net.URLEncoder;
022: import java.net.URLConnection;
023: import java.net.HttpURLConnection;
024: import java.net.MalformedURLException;
025:
026: import javax.servlet.http.HttpServletRequest;
027: import javax.servlet.http.HttpServletResponse;
028: import javax.servlet.http.HttpUtils;
029: import javax.servlet.http.Cookie;
030:
031: import com.sun.portal.rewriter.RewriterPool;
032: import com.sun.portal.rewriter.RewriterModule;
033: import com.sun.portal.rewriter.Rewriter;
034: import com.sun.portal.rewriter.Translator;
035: import com.sun.portal.rewriter.AbsoluteTranslator;
036: import com.sun.portal.rewriter.util.uri.PageSpec;
037: import com.sun.portal.rewriter.rom.InvalidXMLException;
038:
039: /**
040: * <P>
041: *
042: * This class fetches and stores the content from an arbitrary url
043: *
044: *@author Administrator
045: *@created February 23, 2004
046: */
047:
048: class Fetcher extends Thread {
049:
050: /**
051: * Initial size for the StringBuffer storing the retrieved content. Sort of
052: * arbitrary, and is probably larger than expected.
053: */
054: private final int CONTENT_BUFFER_SIZE = 2048;
055:
056: private StringBuffer content = null;
057: private String data = null;
058: private URL url = null;
059: private URL requestURL = null;
060: private boolean terminated = false;
061: private boolean finished = false;
062: private boolean allCookies = false;
063: private List cookiesToForwardList = null;
064: private HttpServletRequest request;
065: private HttpServletResponse response;
066: private Properties props = null;
067: private String rulesetID = null;
068: private String contentType = null;
069: private FileCache filecache = null;
070:
071: private static List ignoreHeaders = new ArrayList();
072:
073: static {
074: // keep it all in lowercase
075: ignoreHeaders.add("host");
076: ignoreHeaders.add("cookie");
077: ignoreHeaders.add("connection");
078: ignoreHeaders.add("content-length");
079: ignoreHeaders.add("accept-encoding");
080: ignoreHeaders.add("location");
081: //RewriterModule.initIDSAME();
082: }
083:
084: /**
085: * <P>
086: *
087: * Constructor method
088: *
089: *@param u URL to fetch from
090: *@param p Current object
091: *@param req HttpServletRequest
092: *@param res HttpServletResponse
093: *@param rb Description of the Parameter
094: *@param rsid Description of the Parameter
095: *@exception MalformedURLException
096: *@exception ProviderException
097: */
098: public Fetcher(String u, FileCache filecache,
099: HttpServletRequest req, HttpServletResponse res, String rsid)
100: throws MalformedURLException {
101: this .filecache = filecache;
102: filecache.log("Fetcher(): constructing Fetcher()");
103: request = req;
104: response = res;
105: //requestURL = new URL(context.getRequestServer(request).toString());
106: requestURL = new URL(HttpUtils.getRequestURL(req).toString());
107: filecache.log("Fetcher(): requestURL=" + requestURL.toString());
108: filecache.log("Fetcher(): getAbsURL() returns " + getAbsURL(u));
109: url = new URL(getAbsURL(u));
110: this .props = filecache.props;
111: rulesetID = rsid;
112: filecache.log("Fetcher(): rulesetID=" + rulesetID);
113: Boolean allCookiesValue = new Boolean(props
114: .getProperty("Fetcher.cookiesToForwardAll"));
115: allCookies = allCookiesValue.booleanValue();
116: //Retrieve a colon delimited list of cookies to forward and then add them to a List
117: String cookiesList = props
118: .getProperty("Fetcher.cookiesToForwardList");
119: StringTokenizer tok = new StringTokenizer(cookiesList, ":");
120: while (tok.hasMoreTokens()) {
121: cookiesToForwardList.add(tok.nextToken());
122: }
123:
124: setDaemon(true);
125: filecache.log("Fetcher(): end constructor");
126: }
127:
128: /**
129: * <P>
130: *
131: * Return the contents of the HTML page
132: *
133: *@return content
134: *@exception ProviderException
135: */
136: public StringBuffer getContent() {
137: return content;
138: }
139:
140: /**
141: * <P>
142: *
143: * Check whether reading from the URL is complete
144: *
145: *@return finished (true/false)
146: */
147: public boolean isFinished() {
148: return finished;
149: }
150:
151: /**
152: * <P>
153: *
154: * Terminate the current thread
155: *
156: */
157: public void terminate() {
158: terminated = true;
159: interrupt();
160: }
161:
162: /**
163: * <P>
164: *
165: * Check whether the current thread is terminated
166: *
167: *@return None
168: */
169: public boolean isTerminated() {
170: return terminated;
171: }
172:
173: /**
174: * <P>
175: *
176: * Fetch the contents from the specified URL. Populates the "content" buffer
177: *
178: */
179: public void run() {
180: filecache.log("Fetcher.run()");
181: if (isTerminated()) {
182: if (filecache.debug)
183: filecache
184: .log("Fetcher.run() isTerminated returned true");
185: return;
186: }
187: // Set the proxy information
188:
189: String useProxy = props.getProperty("Fetcher.useProxy");
190: if ((useProxy != null) && !(useProxy.length() < 1)
191: && !(useProxy.equals("none"))) {
192: Properties systemProps = System.getProperties();
193: if (useProxy.equals("proxy")) {
194: systemProps.put("http.proxyHost", props
195: .getProperty("Fetcher.proxyHost"));
196: systemProps.put("http.proxyPort", props
197: .getProperty("Fetcher.proxyPort"));
198: }
199: if (useProxy.equals("socks")) {
200: systemProps.put("http.socksProxyHost", props
201: .getProperty("Fetcher.socksProxyHost"));
202: systemProps.put("http.socksProxyPort", props
203: .getProperty("Fetcher.socksProxyPort"));
204: }
205: System.setProperties(systemProps);
206: }
207: InputStream in = null;
208: String charset = null;
209: try {
210: HttpURLConnection uc = null;
211: try {
212: uc = (HttpURLConnection) url.openConnection();
213: } catch (ClassCastException ce) {
214: filecache.log(
215: "FileCache doesn't support ftp, file urls. URL:"
216: + url, ce);
217: throw new Exception(
218: "FileCache doesn't support ftp, file urls. URL:"
219: + url, ce);
220: }
221: if (uc == null) {
222: filecache
223: .log("Fetcher.run(): URLConnection object is null");
224: throw new Exception(
225: "Fetcher.run(): URLConnection object is null");
226: }
227: uc.setDoInput(true);
228: uc.setUseCaches(false);
229:
230: //
231: // Begin forward cookies
232: //
233: if (request != null) {
234: if (filecache.debug)
235: filecache.log("Fetcher.run(); request is not null");
236: Cookie[] ca = request.getCookies();
237: Vector forwardedCookies = null;
238: if (ca != null) {
239: forwardedCookies = new Vector(ca.length);
240: for (int j = 0; j < ca.length; j++) {
241: if (filecache.forward(ca[j].getName(),
242: allCookies, cookiesToForwardList)) {
243: forwardedCookies.add(ca[j]);
244: if (filecache.debug)
245: filecache.log("Fetcher.run() Added "
246: + ca[j]
247: + "to forwarded cookies list");
248: }
249: }
250: } else if (filecache.debug) {
251: filecache.log("Fetcher.run(); ca was null.");
252: }
253:
254: if ((forwardedCookies != null)
255: && (forwardedCookies.size() > 0)) {
256: StringBuffer cs = new StringBuffer();
257: if (filecache.debug) {
258: filecache.log("cookieTable="
259: + filecache.cookieTable);
260: }
261: for (Iterator fci = forwardedCookies.iterator(); fci
262: .hasNext();) {
263: Cookie c = (Cookie) fci.next();
264: String path = null;
265: String domain = null;
266: if (filecache.cookieTable != null
267: && filecache.cookieTable.containsKey(c
268: .getName())) {
269: // restore the original domain and path.
270: domainPathEntry dp = (domainPathEntry) filecache.cookieTable
271: .get(c.getName());
272: path = dp.getPath();
273: domain = dp.getDomain();
274: }
275: cs.append(c.getName()).append("=").append(
276: URLEncoder.encode(c.getValue()));
277: if (path != null) {
278: cs.append("; ").append("$Path=" + path);
279:
280: }
281: if (domain != null) {
282: cs.append("; ").append("$Domain=" + domain);
283: }
284:
285: if (fci.hasNext()) {
286: cs.append("; ");
287: }
288: }
289: if (filecache.debug) {
290: filecache
291: .log("Fetcher.run(): Forwarded cookie header: "
292: + cs.toString());
293: filecache.log("Fetcher.run(): cookieMap="
294: + filecache.cookieTable);
295: }
296: uc.setRequestProperty("Cookie", cs.toString());
297: }
298: }
299: setHeaders(uc, request);
300: int contentLength = uc.getContentLength();
301: int responseCode = uc.getResponseCode();
302:
303: if (filecache.debug) {
304: filecache.log("Fetcher.run(): got response, code="
305: + responseCode + "contentLength="
306: + contentLength);
307: }
308:
309: if (responseCode == -1) {
310: filecache.log("Fetcher.run(): response code was -1!");
311: return;
312: }
313:
314: if (responseCode == uc.HTTP_MOVED_PERM
315: || responseCode == uc.HTTP_MOVED_TEMP) {
316: //
317: // do redirect
318: //
319: String loc = uc.getHeaderField("Location");
320: Header h = new Header(loc);
321: if (filecache.debug) {
322: filecache
323: .log("Fetcher.run(): got redirect, location="
324: + h.getValue());
325: }
326:
327: //
328: // recursively call run, with new url based on location of redirect.
329: //
330: url = new URL(getAbsURL(h.getValue()));
331: // Process cookies that were set in the response by handing
332: // them back to the browser (if in the cookiesToForward list)
333: String key;
334: for (int i = 1; (key = uc.getHeaderFieldKey(i)) != null; i++) {
335: if (!key.equalsIgnoreCase("Set-cookie")) {
336: continue;
337: }
338: String setCookie = uc.getHeaderField(i);
339: processSetCookieHeader(setCookie);
340: }
341: run();
342: return;
343: }
344:
345: // Process cookies that were set in the response by handing
346: // them back to the browser (if in the cookiesToForward list)
347: String key;
348: for (int i = 1; (key = uc.getHeaderFieldKey(i)) != null; i++) {
349: if (!key.equalsIgnoreCase("Set-cookie")) {
350: continue;
351: }
352: String setCookie = uc.getHeaderField(i);
353: processSetCookieHeader(setCookie);
354: }
355:
356: //
357: // read the content
358: //
359:
360: contentType = uc.getContentType();
361: PageSpec pageSpec = new PageSpec(uc.getURL().toString(),
362: contentType);
363: in = uc.getInputStream();
364: byte[] bytes = filecache.readContent(in, contentLength);
365: charset = filecache.getContentEncoding(contentType, bytes,
366: pageSpec.getMIME());
367: if (charset != null && charset.length() != 0) {
368: data = new String(bytes, charset);
369: } else {
370: data = new String(bytes);
371: }
372:
373: // To check whether rewriter is available. If so
374: // call the necessary API's
375: //Begin rewriter code.
376: RewriterPool rwPool = RewriterPool.getDefault();
377: Rewriter rewriter = null;
378: if (rwPool != null)
379: rewriter = rwPool.getRewriter(rulesetID, pageSpec);
380: if (rewriter != null) {
381: if (filecache.debug)
382: filecache.log("Fetcher.run(): rewriter not null");
383: Translator t = new AbsoluteTranslator(pageSpec);
384: //
385: // found translator, translate content and return.
386: //
387: content = new StringBuffer(rewriter.rewrite(data, t));
388: } else {
389: if (filecache.debug)
390: filecache.log("Fetcher.run(): rewriter is null");
391: content = new StringBuffer(data);
392: }
393:
394: if (filecache.debug) {
395: filecache.log("Fetcher.run(): got content=\n"
396: + content.toString());
397: }
398: finished = true;
399: } catch (InvalidXMLException ixe) {
400: filecache.log("Fetcher.run(): ", ixe);
401: content = new StringBuffer(props
402: .getProperty("invalidruleset"));
403: finished = true;
404: } catch (UnsupportedEncodingException ue) {
405: filecache.log("Fetcher.run():Invalid charset " + charset);
406: filecache.log("Fetcher.run():Unsupported Encoding.", ue);
407: content = new StringBuffer(props
408: .getProperty("unsupportedencoding"));
409: finished = true;
410: } catch (Exception e) {
411: filecache.log("Exception in Fetcher:run()", e);
412: //
413: // set content to null, this tells the desktop to try to get
414: // from the cache if available
415: //
416: content = null;
417: finished = true;
418: } finally {
419: try {
420: if (in != null) {
421: in.close();
422: }
423: } catch (IOException e2) {
424: // nothing
425: }
426: }
427: }
428:
429: /**
430: * Gets the cookieName attribute of the Fetcher object
431: *
432: *@param cookieStr Description of the Parameter
433: *@return The cookieName value
434: */
435: private String getCookieName(String cookieStr) {
436: String name = null;
437: int index = cookieStr.indexOf("=");
438: if (index != -1) {
439: name = cookieStr.substring(0, index);
440: }
441: return name;
442: }
443:
444: /**
445: * Description of the Method
446: *
447: *@param cookieStr Description of the Parameter
448: */
449: private void processSetCookieHeader(String cookieStr) {
450: // change the domain/path information.
451: String newCookieStr = modifyCookieHeader(cookieStr);
452: if (filecache.debug) {
453: filecache
454: .log("Fetcher.processSetCookieHeader(): Original set-cookie="
455: + cookieStr
456: + "Modified set-cookie="
457: + newCookieStr);
458: }
459: if (newCookieStr != null) {
460: cookieStr = newCookieStr;
461: }
462: try {
463: if (cookieStr != null && response != null) {
464: String cookieName = getCookieName(cookieStr);
465: if ((cookieName != null)
466: && (filecache.forward(cookieName, allCookies,
467: cookiesToForwardList))) {
468: // include it in the response (after rewriting)
469: response.addHeader("Set-Cookie", cookieStr);
470: }
471: }
472: } catch (Exception e) {
473: filecache.log(
474: "Fetcher.processSetCookieHeader(): invalid cookie: "
475: + cookieStr, e);
476: }
477: }
478:
479: /**
480: * Gets the absURL attribute of the Fetcher object
481: *
482: *@param u Description of the Parameter
483: *@return The absURL value
484: */
485: private String getAbsURL(String u) throws MalformedURLException {
486:
487: if (!u.regionMatches(true, 0, "http://", 0, 7)
488: && !u.regionMatches(true, 0, "https://", 0, 8)) {
489: String scheme = requestURL.getProtocol();
490: StringBuffer absURL = new StringBuffer().append(scheme)
491: .append("://").append(requestURL.getHost()).append(
492: ":");
493: int port = requestURL.getPort();
494: // default port schemes need to fill in port
495: if (scheme.equals("http") && port <= 0) {
496: port = 80;
497: } else if (scheme.equals("https") && port <= 0) {
498: port = 443;
499: }
500:
501: absURL.append(port).append(u);
502: return absURL.toString();
503: } else {
504: return u;
505: }
506: }
507:
508: /**
509: * Description of the Method
510: *
511: *@param cookieStr Description of the Parameter
512: *@return Description of the Return Value
513: */
514: private String modifyCookieHeader(String cookieStr) {
515: if (cookieStr == null) {
516: return null;
517: }
518:
519: StringTokenizer tokens = new StringTokenizer(cookieStr, ";");
520:
521: if (!tokens.hasMoreTokens()) {
522: return cookieStr;
523: }
524:
525: String nameAndValue = tokens.nextToken().trim();
526: String cookieName = getCookieName(cookieStr);
527: String host = url.getHost();
528: String portalHost = requestURL.getHost();
529:
530: int equIndex;
531:
532: StringBuffer sb = new StringBuffer();
533: sb.append(nameAndValue);
534:
535: String token;
536:
537: String attr;
538:
539: String val;
540: String path = null;
541: String domain = null;
542:
543: while (tokens.hasMoreTokens()) {
544: token = tokens.nextToken().trim();
545: if (token.length() == 0) {
546: continue;
547: }
548:
549: equIndex = token.indexOf('=');
550:
551: if (equIndex < 0) {
552: attr = token;
553: val = null;
554: } else {
555: attr = token.substring(0, equIndex).trim();
556: if (equIndex + 1 == token.length()) {
557: val = null;
558: } else {
559: val = token.substring(equIndex + 1).trim();
560: }
561: }
562:
563: if (attr.equalsIgnoreCase("path")) {
564: path = val;
565: if (path != null) {
566: if (!path.startsWith("/")) {
567: return null;
568: }
569: }
570: } else if (attr.equalsIgnoreCase("domain")) {
571: domain = val;
572: if (domain != null) {
573: // must start with '.' and at least .a.b
574: if (domain.charAt(0) != '.' || domain.length() < 4) {
575: return null;
576: }
577: if (domain.charAt(domain.length() - 1) == '.') {
578: return null;
579: }
580:
581: if (!host.toLowerCase().endsWith(
582: domain.toLowerCase())) {
583: return null;
584: }
585: }
586: } else {
587: sb.append(";" + token);
588: }
589: }
590: // store the domain and path in cookieMap.
591: String portalDomain = getDomain(portalHost);
592: if (domain != null && path != null) {
593: if (!domain.equalsIgnoreCase(portalDomain)
594: || !path.equalsIgnoreCase(getPath())) {
595: filecache.cookieTable.put(cookieName,
596: new domainPathEntry(domain, path));
597: }
598: } else {
599: filecache.cookieTable.put(cookieName, new domainPathEntry(
600: domain, path));
601: }
602: // rewrite the domain to portal domain and path to root.
603: sb.append(";domain=").append(portalDomain).append(";path=/");
604:
605: return sb.toString();
606: }
607:
608: /**
609: * Gets the path attribute of the Fetcher object
610: *
611: *@return The path value
612: */
613: private String getPath() {
614: return (request.getContextPath() + request.getServletPath());
615: }
616:
617: /**
618: * Gets the domain attribute of the Fetcher object
619: *
620: *@param host Description of the Parameter
621: *@return The domain value
622: */
623: private String getDomain(String host) {
624:
625: int index1 = host.lastIndexOf('.');
626: if (index1 == -1) {
627: return host;
628: }
629: int index2 = host.substring(0, index1).lastIndexOf('.');
630: if (index1 == -1 || index2 == -1) {
631: return host;
632: }
633: return host.substring(index2);
634: }
635:
636: /**
637: * pass all headers to the URL like userAgent etc. Helps in scraping sites
638: * which are client-aware
639: *
640: *@param uc The new headers value
641: *@param request The new headers value
642: *@params uc the URLConnection object where the header is set
643: *@params request the source where the header is copied from
644: */
645: protected void setHeaders(URLConnection uc,
646: HttpServletRequest request) {
647: Enumeration names = request.getHeaderNames();
648:
649: if (names != null) {
650: while (names.hasMoreElements()) {
651: String name = (String) names.nextElement();
652: if (name != null) {
653: String nameLC = name.toLowerCase();
654: String value = request.getHeader(name);
655: if (filecache.debug) {
656: filecache.log("Fetcher.setHeader(): header:"
657: + name + " = " + value);
658: }
659:
660: if (ignoreHeaders.contains(nameLC)) {
661: continue;
662: // dont pass specific Request headers
663: }
664:
665: if (value != null) {
666: uc.setRequestProperty(name, value);
667:
668: if (filecache.debug) {
669: filecache
670: .log("FileCache's Fetcher.setHeader(): added header: "
671: + name + " = " + value);
672: }
673: }
674: }
675: }
676: }
677: return;
678: }
679:
680: /**
681: * Gets the contentType attribute of the Fetcher object
682: *
683: *@return The contentType value
684: */
685: String getContentType() {
686: return contentType;
687: }
688:
689: /**
690: * Description of the Class
691: *
692: *@author Administrator
693: *@created February 23, 2004
694: */
695: static class domainPathEntry {
696: String domain = null;
697: String path = null;
698:
699: /**
700: * Constructor for the domainPathEntry object
701: *
702: *@param d Description of the Parameter
703: *@param p Description of the Parameter
704: */
705: domainPathEntry(String d, String p) {
706: domain = d;
707: path = p;
708: }
709:
710: /**
711: * Gets the path attribute of the domainPathEntry object
712: *
713: *@return The path value
714: */
715: String getPath() {
716: return path;
717: }
718:
719: /**
720: * Gets the domain attribute of the domainPathEntry object
721: *
722: *@return The domain value
723: */
724: String getDomain() {
725: return domain;
726: }
727: }
728:
729: }
|