001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005: package com.sun.portal.providers.urlscraper;
006:
007: import java.util.List;
008: import java.util.ResourceBundle;
009: import java.util.StringTokenizer;
010: import java.util.Hashtable;
011: import java.util.Map;
012: import java.util.HashMap;
013: import java.util.Iterator;
014: import java.util.logging.Level;
015: import java.util.logging.Logger;
016: import java.util.logging.LogRecord;
017:
018: import java.net.MalformedURLException;
019: import java.net.FileNameMap;
020: import java.net.URLConnection;
021: import java.net.URL;
022:
023: import java.io.File;
024: import java.io.IOException;
025: import java.io.FileNotFoundException;
026: import java.io.FileInputStream;
027:
028: import java.io.InputStream;
029: import java.io.UnsupportedEncodingException;
030:
031: import javax.servlet.http.HttpServletRequest;
032: import javax.servlet.http.HttpServletResponse;
033:
034: import com.sun.portal.providers.ProfileProviderAdapter;
035: import com.sun.portal.providers.ProviderException;
036: import com.sun.portal.providers.context.ProviderContextException;
037: import com.sun.portal.desktop.util.I18n;
038: import com.sun.portal.desktop.RequestThreadLocalizer;
039: import com.sun.portal.desktop.ubt.DesktopEvents;
040: import com.sun.portal.ubt.UBTLogManager;
041: import com.sun.portal.ubt.UBTEvent;
042: import com.sun.portal.log.common.PortalLogger;
043: import com.sun.portal.providers.util.ProviderProperties;
044:
045: import com.sun.identity.security.DecryptAction;
046: import com.sun.identity.security.EncryptAction;
047: import java.security.AccessController;
048:
049: /**
050: * <P> A URLScraperProvider is a content provider that can retrieve and display
051: * content from a given URL.
052: *
053: * <P> URLScraperProvider acts as an HTTP client and makes a request
054: * for the content of the specified URL and then displays it in the
055: * channel.
056: *
057: * <P> Each URLScraper channel has its own timeout attribute. The
058: * channel will wait up to its individual timeout to receive
059: * content.
060: *
061: * <P><B>Forwarding of cookies</B><BR>
062: * Each URLScraper channel has a <CODE>cookiesToForwardList</CODE> attribute
063: * that can be set on the in the display profile. If
064: * a cookie is allowed by this attribute, a cookie in the request
065: * coming from the browser will be forwarded to the web server specified
066: * for the URL. <CODE>allCookies</CODE> attribute can be set to true to allow
067: * all the cookies. A <CODE>set-cookie</CODE> request from that web server
068: * will be sent back to the browser. The <CODE>set-cookie</CODE> request
069: * is modified so that the cookie is only sent back to the portal server.
070: *
071: * <P><B>URL Rewriting</B><BR>
072: * The content gathered by the channel will be rewritten if
073: * the rewriter is available. The ruleset used by the rewriter can be
074: * specified in the display profile attribute <i>rulesetID</i>.
075: * Relative URLs are converted to absolute URLs. For example, if your portal server is
076: * <CODE>http://portal.iplanet.com/</CODE> and the web server specified in the
077: * URL is <CODE>http://foo.sesta.com/</CODE> and the file contains
078: * <BR><BR>
079: * <CODE><IMG SRC="/images/blah.gif"></CODE>
080: * <BR><BR>
081: * then the content sent back to browser via portal server will be
082: * rewritten as:
083: * <BR><BR>
084: * <CODE><IMG SRC="http://foo.sesta.com/images/blah.gif"></CODE>
085: * <BR><BR>
086: * Because otherwise the browser will attempt to read the image from
087: * <CODE>http://portal.sesta.com/images/blah.gif</CODE> and will not resolve it.
088: *
089: * <P><B>SSL protected pages</B><BR>
090: * In general the URLScraperProvider will work with SSL pages. The
091: * important thing to remember is that there can be no level of
092: * interaction required by the specified URL as there is no way to
093: * pass that information to the end user.
094: * <UL>
095: * <LI>The certificate on the target server needs to be valid (in other words
096: * signed by a recognized CA) If you browse to the specified URL
097: * and get a warning dialog about an expired or unrecognized certificate,
098: * the URLScraperProvider will not be able to access the site because
099: * it cannot respond to the dialogs.
100: * <LI>There may be difficulties with sites that require 128-bit SSL.
101: * </UL>
102: *
103: * <P><B>Timeouts</B><BR>
104: * There are 2 timeout values to consider:
105: * <UL>
106: * <LI>URLScraper timeout (specific to the channel)
107: * <LI>RefreshTime (attribute for all channels)
108: * </UL>
109: * Each URLScraper channel has its own timeout attribute. The
110: * channel will wait up to its individual timeout to receive
111: * content.
112: *
113: * <P><B>Encoding</B><BR>
114: * The order for determining the encoding would be
115: * HTTP header, if available (only applies to http(s) urls)<BR>
116: * inputEncoding property, if non-blank<BR>
117: * tag in content, e.g. meta tag in html & wml, xml header for xml, if available
118: * (only applies to HTML, XML,WML determined based on the MIMEType)<BR>
119: * system default <BR>
120: * MIMEType is determined from the jvm table. If not set, it is determined
121: * from the file extension.<BR>
122: *
123: * <P><B>Proxy Configuration</B><BR>
124: * URLScraper channel uses a proxy to scrape the url specified
125: * if the proxy is set in jvm12.conf file for web server
126: * For Example the proxy can be set as <BR>
127: * <CODE>http.proxyHost=<proxyHost></CODE><BR>
128: * <CODE>http.proxyPort=<proxyPort></CODE>
129: *
130: * <P>The <CODE>refreshTime</CODE> attribute is used for caching and
131: * will cause the URL not to be fetched again if the page is reloaded
132: * within that time.
133: *
134: **/
135:
136: public class URLScraperProvider extends ProfileProviderAdapter
137: implements ProviderProperties {
138:
139: private static Logger logger = PortalLogger
140: .getLogger(URLScraperProvider.class);
141:
142: private ResourceBundle bundle = null;
143: private List pflist = null;
144: private Fetcher httpFetcher = null;
145:
146: /**
147: * Array of File extensions mapped to the MIMETypes
148: */
149: protected static String typeTable[][] = { { ".html", "text/html" },
150: { ".htm", "text/html" }, { ".gif", "image/gif" },
151: { ".txt", "text/plain" }, { ".jpg", "image/jpeg" },
152: { ".xml", "text/xml" }, { ".wml", "text/vnd.wap.wml" } };
153:
154: /**
155: * Default constructor.
156: */
157: public URLScraperProvider() {
158: }
159:
160: /**
161: * Gets the timeout property for the provider.
162: *
163: * @return timeout value
164: * @exception ProviderException if there is an error getting the timeout
165: * property.
166: * @see com.sun.portal.providers.ProviderException
167: */
168: protected int getTimeout() throws ProviderException {
169: return getIntegerProperty("timeout");
170: }
171:
172: /**
173: * <P> Gets the url property for the provider. This is the URL from where
174: * the contents are fetched
175: *
176: * @return URL value
177: * @exception ProviderException if there is an error getting the URL
178: * property.
179: * @see com.sun.portal.providers.ProviderException
180: */
181: protected String getURL() throws ProviderException {
182: return getPropertyValue("url");
183: }
184:
185: /**
186: * <P> Gets the urlScraperRulesetID to be used by rewriter.
187: * @return String value
188: * @exception ProviderException if there is an error getting the
189: * urlScrapperRulesetID.
190: * @see com.sun.portal.providers.ProviderException
191: */
192: protected String getRuleSetID() throws ProviderException {
193: return getPropertyValue("urlScraperRulesetID");
194: }
195:
196: /**
197: * <P> Gets the inputEncoding to be used by content.
198: * This method returns the inputEncoding which would be
199: * used in encoding the scraped content.
200: * @return String value
201: * @exception ProviderException if there is an error getting the
202: * input encoding.
203: * @see com.sun.portal.providers.ProviderException
204: */
205: public String getInputEncoding() throws ProviderException {
206: return getPropertyValue("inputEncoding");
207: }
208:
209: private String getPropertyValue(String key)
210: throws ProviderException {
211: String val = null;
212: try {
213: // Fix for 6317585. remove caching
214: // Create pflist when request != null
215:
216: if ((RequestThreadLocalizer.getRequest() != null)
217: || (pflist == null))
218: pflist = getProviderContext()
219: .getClientAndLocalePropertiesFilters();
220: val = getStringProperty(key, pflist);
221: } catch (ProviderContextException pce) {
222: logger.log(Level.INFO, "PSCR_CSPPU0014", pce);
223: }
224: if (val == null) {
225: val = getStringProperty(key);
226: }
227: return val;
228: }
229:
230: /** Determines presentability for channels based on this provider.
231: * This overrides the base class's implementation to returns true for all device
232: * @param request the HttpServletRequest
233: * @return boolean true for all devices
234: **/
235: public boolean isPresentable(HttpServletRequest request) {
236: return true;
237: }
238:
239: /**
240: * <P>Get the provider's content by retrieving content from specified
241: * URL.
242: *
243: * This method internally calls <code>getHttpContent</code> when the url
244: * returned from <code>getURL()</code> is a http or https url.
245: *
246: * This method wraps certain exceptions thrown, into an error message to
247: * display as the channel content.
248: *
249: * @param req An HttpServletRequest that contains information related
250: * to this request for content.
251: * @param res An HttpServletResponse that allows the provider to
252: * influence the overall response for the desktop page
253: * (besides generating the content).
254: * @return Channel content
255: * @exception ProviderException if there was an error generating the
256: * content.
257: * @see com.sun.portal.providers.ProviderException
258: * @see #getHttpContent
259: * @see #getURL
260: */
261: public StringBuffer getContent(HttpServletRequest req,
262: HttpServletResponse res) throws ProviderException {
263: StringBuffer content = new StringBuffer();
264:
265: bundle = getResourceBundle();
266:
267: String url = getURL();
268: if (url == null || url.trim().length() == 0) {
269: content.append(bundle.getString("urlnotspecified"));
270: return content;
271: }
272: String proto = null;
273: try {
274: proto = url.substring(0, url.indexOf(':'));
275: } catch (IndexOutOfBoundsException iobe) {
276: }
277:
278: if (proto != null && proto.equalsIgnoreCase("file")) {
279: String PathName = null;
280: try {
281: PathName = url.substring(url.indexOf('/'));
282: } catch (IndexOutOfBoundsException iobe) {
283: logger.log(Level.INFO, "PSCR_CSPPU0015", iobe);
284: content = content.append(bundle
285: .getString("unsupportedurl"));
286: return content;
287: }
288: try {
289: content = getFileAsBuffer(PathName);
290: } catch (UnsupportedEncodingException ue) {
291: logger.log(Level.INFO, "PSCR_CSPPU0016", ue);
292: content = content.append(bundle
293: .getString("unsupportedencoding"));
294: } catch (IOException ioe) {
295: logger.log(Level.INFO, "PSCR_CSPPU0017", ioe);
296: return null;
297: }
298:
299: return content;
300: }
301:
302: try {
303:
304: boolean enable = false;
305: try {
306: enable = UBTLogManager.getInstance().isUBTEnabled()
307: && this .getBooleanProperty("enableUBT", false)
308: && Logger
309: .getLogger(
310: UBTEvent
311: .getInstance(
312: DesktopEvents.USER_CLICK_EX_LINK)
313: .getLoggerName())
314: .isLoggable(
315: UBTEvent
316: .getInstance(
317: DesktopEvents.USER_CLICK_EX_LINK)
318: .getLevel());
319: } catch (Exception e) {
320: logger.log(Level.INFO, "PSCR_CSPPU0025", e);
321: }
322: content = enable ? getHttpContent(req, res, url, true)
323: : getHttpContent(req, res, url, false);
324: } catch (InterruptedException ie) {
325: logger.log(Level.INFO, "PSCR_CSPPU0018", ie);
326: content = content.append(bundle.getString("timeout"));
327: return content;
328:
329: } catch (MalformedURLException mue) {
330: logger.log(Level.INFO, "PSCR_CSPPU0019", mue);
331: content = content
332: .append(bundle.getString("unsupportedurl"));
333: return content;
334: }
335: return content;
336: }
337:
338: /**
339: * <P>Get the provider's content by retrieving content from the specified
340: * http or https URL.
341: *
342: * <P>This method does not handle file URLs. It only handles http or https urls.
343: * The content scraped from the specified url is rewritten if a rewriter is
344: * available using the ruleset returned by <code>getRuleSetID()</code>
345: *
346: * <P>This method throws exceptions for certain exceptional conditions instead
347: * of returning an error message in the returned <code>StringBuffer</code>
348: *
349: * @param req An HttpServletRequest that contains information related
350: * to this request for content.
351: * @param res An HttpServletResponse that allows the provider to
352: * influence the overall response for the desktop page
353: * (besides generating the content).
354: * @param url http or https url string
355: * @return Scraped content
356: * @exception InterruptedException if there is a timeout while
357: * trying to get the scraped content
358: * @exception MalformedURLException if the url passed in is not a valid
359: * http or https url.
360: * @exception ProviderException if there was an error generating the
361: * content
362: * @see com.sun.portal.providers.ProviderException
363: * @see #getRuleSetID
364: */
365: protected StringBuffer getHttpContent(HttpServletRequest req,
366: HttpServletResponse res, String url)
367: throws InterruptedException, MalformedURLException,
368: ProviderException {
369: return getHttpContent(req, res, url, false);
370: }
371:
372: /**
373: * <P>Get the provider's content by retrieving content from the specified
374: * http or https URL.
375: *
376: * <P>This method does not handle file URLs. It only handles http or https urls.
377: * The content scraped from the specified url is rewritten if a rewriter is
378: * available using the ruleset returned by <code>getRuleSetID()</code>
379: *
380: * <P>This method throws exceptions for certain exceptional conditions instead
381: * of returning an error message in the returned <code>StringBuffer</code>
382: *
383: * @param req An HttpServletRequest that contains information related
384: * to this request for content.
385: * @param res An HttpServletResponse that allows the provider to
386: * influence the overall response for the desktop page
387: * (besides generating the content).
388: * @param url http or https url string
389: * @param ubt Indicates whether to track links external to portal
390: * @return Scraped content
391: * @exception InterruptedException if there is a timeout while
392: * trying to get the scraped content
393: * @exception MalformedURLException if the url passed in is not a valid
394: * http or https url.
395: * @exception ProviderException if there was an error generating the
396: * content
397: * @see com.sun.portal.providers.ProviderException
398: * @see #getRuleSetID
399: */
400:
401: protected StringBuffer getHttpContent(HttpServletRequest req,
402: HttpServletResponse res, String url, boolean ubt)
403: throws InterruptedException, MalformedURLException,
404: ProviderException {
405: StringBuffer content = new StringBuffer();
406:
407: //
408: // fetch the content
409: //
410: if (httpFetcher == null) {
411: Map config = new HashMap();
412: //Add the properties that might have been added by customer like uid, password
413: try {
414: Iterator it = getProviderContext().getNames(
415: this .getName());
416: while (it.hasNext()) {
417: String propertyName = (String) it.next();
418: config.put(propertyName, getProviderContext()
419: .getProperty(this .getName(), propertyName));
420: }
421: } catch (ProviderContextException pcEx) {
422: logger.log(Level.INFO, "PSCR_CSPPU0027", pcEx);
423: }
424: config.put(Fetcher.KEY_RULESET_ID, getRuleSetID());
425: config.put(Fetcher.KEY_COOKIES_TO_FORWARD_ALL, new Boolean(
426: getCookiesToForwardAll()));
427: config.put(Fetcher.KEY_COOKIES_TO_FORWARD_LIST,
428: getcookiesToForwardList());
429: config.put(Fetcher.KEY_RESOURCE_BUNDLE, bundle);
430: config.put(Fetcher.KEY_INPUT_ENCODING, getInputEncoding());
431: try {
432: //overwrite the Password with plain text password
433: config.put(Fetcher.KEY_HTTP_AUTH_PASSWORD,
434: getHttpAuthPassword());
435: //these following entries are already added or may not be present in derived providers like XML Channel.
436: //If the name of these entries are same in fetcher and in the DP , it is already taken care of
437: //config.put(Fetcher.KEY_FORM_DATA,getFormData() );
438: //config.put(Fetcher.KEY_LOGIN_FORM_DATA,getLoginFormData() );
439: //config.put(Fetcher.KEY_LOGIN_URL,getLoginUrl() );
440: //config.put(Fetcher.KEY_HTTP_AUTH,new Boolean(isHttpAuth() ));
441: //config.put(Fetcher.KEY_HTTP_AUTH_UID,getHttpAuthUid() );
442: //config.put(Fetcher.KEY_LOGOUT_URL,getLogoutUrl() );
443: } catch (ProviderException pEx) {
444: }
445:
446: String ubtAppendUrl = getProviderContext().getDesktopURL(
447: req)
448: + "?action=ubt&url=";
449:
450: httpFetcher = ubt ? new Fetcher(config, ubt, ubtAppendUrl)
451: : new Fetcher(config);
452:
453: }
454:
455: try {
456: //
457: // wait for fetcher to get content
458: //
459: content = httpFetcher.getFilteredContent(getTimeout(), req,
460: res, url);
461:
462: } finally {
463:
464: }
465: return content;
466: }
467:
468: /**
469: * This method is called by <code>getContent()</code> if the url
470: * returned by <code>getURL()</code> is a file url.
471: *
472: * @param pathName Fully qualified path name of file
473: * @return File Object specified by the pathName or null
474: * if the file does not exists or cannot be read.
475: */
476: protected File getFile(String pathname) {
477:
478: File returnFile = null;
479:
480: try {
481: returnFile = new File(pathname);
482:
483: if ((returnFile.exists()) && (returnFile.isFile())
484: && (returnFile.canRead())) {
485: return returnFile;
486: } else {
487: return null;
488: }
489: } catch (SecurityException se) {
490: logger.log(Level.INFO, "PSCR_CSPPU0021", se);
491: return null;
492: } catch (NullPointerException npe) {
493: logger.log(Level.INFO, "PSCR_CSPPU0022", npe);
494: return null;
495: }
496: }
497:
498: /**
499: * Gets the specified file as StringBuffer
500: *
501: * @param path specifying fully qualified pathname of the file.
502: * @return StringBuffer containing the data from the specified file
503: * or null if file does not exist or cannot be read.
504: * @exception IOException
505: * @exception ProviderException if there is an error getting the file
506: * as StringBuffer.
507: * @see com.sun.portal.providers.ProviderException
508: */
509: protected StringBuffer getFileAsBuffer(String pathName)
510: throws IOException, ProviderException {
511:
512: StringBuffer result = null;
513: File f = null;
514: try {
515: f = getFile(pathName);
516: if (f != null) {
517: FileInputStream fin = new FileInputStream(f);
518: byte[] bytes = Fetcher.readContent(fin, -1);
519: String MIMEType = getMIMEType(f.getName());
520: String charset = Fetcher.getContentEncoding(null,
521: bytes, MIMEType, getInputEncoding());
522: if (fin != null) {
523: fin.close();
524: }
525: if (charset != null && charset.length() != 0) {
526: result = new StringBuffer(
527: new String(bytes, charset));
528: } else {
529: result = new StringBuffer(new String(bytes));
530: }
531: ContentFilter conFilter = ContentFilterImpl
532: .getInstance(MIMEType);
533: result = conFilter.filter(result);
534: } else {
535: logger.log(Level.INFO, "PSCR_CSPPU0023");
536: return null;
537: }
538:
539: } catch (NegativeArraySizeException nas) {
540: logger.log(Level.INFO, "PSCR_CSPPU0024", nas);
541: return null;
542: } catch (FileNotFoundException fe) {
543: logger.log(Level.INFO, "PSCR_CSPPU0024", fe);
544: return null;
545: }
546: return result;
547: }
548:
549: private static String getMIMEType(String filename) {
550: String MIMEType = null;
551: if (filename != null) {
552: try {
553: FileNameMap fnm = URLConnection.getFileNameMap();
554: if (fnm.getContentTypeFor(filename) != null) {
555: MIMEType = fnm.getContentTypeFor(filename);
556: }
557: } catch (NullPointerException npe) {
558: } // happens if URLConnection.fileNameMap is not set
559: if (MIMEType == null) {
560: String fname = filename.toLowerCase();
561: for (int i = 0; i < typeTable.length
562: && MIMEType == null; i++) {
563: if (fname.endsWith(typeTable[i][0]))
564: MIMEType = typeTable[i][1];
565: }
566: }
567: }
568: return MIMEType;
569: }
570:
571: protected boolean getCookiesToForwardAll() throws ProviderException {
572: return getBooleanProperty("cookiesToForwardAll");
573: }
574:
575: protected List getcookiesToForwardList() throws ProviderException {
576: return getListProperty("cookiesToForwardList");
577: }
578:
579: protected boolean isHttpAuth() throws ProviderException {
580: return getBooleanProperty("isHttpAuth");
581:
582: }
583:
584: protected String getHttpAuthUid() throws ProviderException {
585: return getStringProperty("uid");
586: }
587:
588: protected String getHttpAuthPassword() throws ProviderException {
589: String password = getStringProperty("password");
590: if (password == null)
591: return "";
592: password = (String) AccessController
593: .doPrivileged(new DecryptAction(password));
594: return password;
595: }
596:
597: protected String getLoginUrl() throws ProviderException {
598: return getStringProperty("loginUrl");
599: }
600:
601: protected String getLogoutUrl() throws ProviderException {
602: return getStringProperty("logoutUrl");
603: }
604:
605: protected String getLoginFormData() throws ProviderException {
606: return getStringProperty("loginFormData");
607: }
608:
609: protected String getFormData() throws ProviderException {
610: return getStringProperty("formData");
611: }
612:
613: public StringBuffer getEdit(HttpServletRequest req,
614: HttpServletResponse res) throws ProviderException {
615:
616: StringBuffer content = new StringBuffer();
617: Hashtable tagTable = new Hashtable();
618: tagTable.put("fontFace1", getStringProperty("fontFace1"));
619:
620: //When loginUrl is not specfied and isHttpAuth is false, the channel does not require authentication.
621: if (!isHttpAuth()) {
622: String loginUrl = getLoginUrl();
623: if (loginUrl == null || loginUrl.equals("")) {
624: content = getTemplate("editError.template", tagTable);
625: return content;
626: }
627: }
628:
629: //tagTable.put("fontFace1", getStringProperty( "fontFace1" , clientAndLocaleFilters));
630: tagTable.put("Name", getName());
631: tagTable.put("uid", this .getHttpAuthUid());
632: //Do not retrun the password , just return 8 Stars
633: tagTable.put("password", "********");
634: content = getTemplate(EDIT_TEMPLATE, tagTable);
635:
636: return content;
637: }
638:
639: public URL processEdit(HttpServletRequest req,
640: HttpServletResponse res) throws ProviderException {
641:
642: String uid = req.getParameter("uid");
643: String password = req.getParameter("password");
644:
645: try {
646: if (uid != null) {
647: getProviderContext().setStringProperty(getName(),
648: "uid", uid);
649: }
650: if (password != null && (!password.equals(""))) {
651: //Only if password is not 8 stars , set it, else ignore it
652: if (!password.equals("********")) {
653: password = (String) AccessController
654: .doPrivileged(new EncryptAction(password));
655: getProviderContext().setStringProperty(getName(),
656: "password", password);
657: }
658: }
659:
660: } catch (ProviderContextException pce) {
661: if (logger.isLoggable(Level.WARNING)) {
662: //TBD change the id of LOG RECORD
663: LogRecord rec = new LogRecord(Level.WARNING,
664: "PSDT_CSPPB0001");
665: rec.setLoggerName(logger.getName());
666: String[] param = { "uid", "password" };
667: rec.setParameters(param);
668: rec.setThrown(pce);
669: logger.log(rec);
670: }
671: }
672: //Reset the httpFetcher
673: //httpFetcher.logout(0,req,res);
674: this.httpFetcher = null;
675: getProviderContext().contentChanged(getName());
676:
677: return null;
678: }
679: }
|