001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.jetspeed.portlet;
018:
019: import java.io.BufferedInputStream;
020: import java.io.ByteArrayInputStream;
021: import java.io.ByteArrayOutputStream;
022: import java.io.FileReader;
023: import java.io.IOException;
024: import java.io.InputStream;
025: import java.io.InputStreamReader;
026: import java.io.OutputStreamWriter;
027: import java.io.PrintWriter;
028: import java.io.Reader;
029: import java.io.StringWriter;
030: import java.io.UnsupportedEncodingException;
031: import java.io.Writer;
032: import java.net.URL;
033: import java.util.ArrayList;
034: import java.util.Arrays;
035: import java.util.HashMap;
036: import java.util.Iterator;
037: import java.util.Map;
038: import java.util.StringTokenizer;
039:
040: import javax.portlet.ActionRequest;
041: import javax.portlet.ActionResponse;
042: import javax.portlet.PortletConfig;
043: import javax.portlet.PortletContext;
044: import javax.portlet.PortletException;
045: import javax.portlet.PortletMode;
046: import javax.portlet.PortletURL;
047: import javax.portlet.RenderRequest;
048: import javax.portlet.RenderResponse;
049:
050: import org.apache.commons.httpclient.Cookie;
051: import org.apache.commons.httpclient.Header;
052: import org.apache.commons.httpclient.HttpClient;
053: import org.apache.commons.httpclient.HttpMethod;
054: import org.apache.commons.httpclient.HttpMethodBase;
055: import org.apache.commons.httpclient.NameValuePair;
056: import org.apache.commons.httpclient.methods.GetMethod;
057: import org.apache.commons.httpclient.methods.MultipartPostMethod;
058: import org.apache.commons.httpclient.methods.PostMethod;
059: import org.apache.commons.logging.Log;
060: import org.apache.commons.logging.LogFactory;
061: import org.apache.jetspeed.portlet.webcontent.WebContentHistoryList;
062: import org.apache.jetspeed.portlet.webcontent.WebContentHistoryPage;
063: import org.apache.jetspeed.rewriter.JetspeedRewriterController;
064: import org.apache.jetspeed.rewriter.RewriterController;
065: import org.apache.jetspeed.rewriter.RewriterException;
066: import org.apache.jetspeed.rewriter.RulesetRewriter;
067: import org.apache.jetspeed.rewriter.WebContentRewriter;
068: import org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor;
069: import org.apache.jetspeed.rewriter.rules.Ruleset;
070: import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
071: import org.apache.portals.bridges.velocity.GenericVelocityPortlet;
072: import org.apache.portals.messaging.PortletMessaging;
073:
074: /**
075: * WebContentPortlet
076: *
077: * TODO: Preferences, cache stream instead of URL *
078: *
079: * @author <a href="mailto:rogerrutr@apache.org">Roger Ruttimann </a>
080: * @version $Id: WebContentPortlet.java 605431 2007-12-19 05:11:40Z taylor $
081: */
082:
083: public class WebContentPortlet extends GenericVelocityPortlet {
084:
085: /**
086: * WebContentPortlet Allows navigation inside the portlet and caches the
087: * latest URL
088: */
089:
090: /**
091: * Configuration constants.
092: */
093: public static final String VIEW_SOURCE_PARAM = "viewSource";
094: public static final String EDIT_SOURCE_PARAM = "editSource";
095:
096: // ...browser action buttons
097: public static final String BROWSER_ACTION_PARAM = "wcBrowserAction";
098: public static final String BROWSER_ACTION_PREVIOUS_PAGE = "previousPage";
099: public static final String BROWSER_ACTION_REFRESH_PAGE = "refreshPage";
100: public static final String BROWSER_ACTION_NEXT_PAGE = "nextPage";
101:
102: /**
103: * Action Parameter
104: */
105:
106: // WebContent session data
107: public static final String HISTORY = "webcontent.history";
108: public static final String HTTP_STATE = "webcontent.http.state";
109:
110: // Class Data
111:
112: protected final static Log log = LogFactory
113: .getLog(WebContentPortlet.class);
114: public final static String defaultEncoding = "UTF-8";
115:
116: // Data Members
117:
118: private RulesetRewriter rewriter = null;
119: private RewriterController rewriteController = null;
120:
121: public static final String FORM_POST_METHOD = "post";
122: public static final String FORM_GET_METHOD = "get";
123: public static final String FORM_MULTIPART_METHOD = "multipart";
124:
125: public WebContentPortlet() {
126: super ();
127: }
128:
129: /**
130: * Initialize portlet configuration.
131: */
132: public void init(PortletConfig config) throws PortletException {
133: super .init(config);
134: }
135:
136: /**
137: * processAction() Checks action initiated by the WebContent portlet which
138: * means that a user has clicked on an URL
139: *
140: * @param actionRequest
141: * @param actionResponse
142: * @throws PortletException
143: * @throws IOException
144: */
145: public void processAction(ActionRequest actionRequest,
146: ActionResponse actionResponse) throws PortletException,
147: IOException {
148: // check to see if it is a meta-navigation command
149: String browserAction = actionRequest
150: .getParameter(BROWSER_ACTION_PARAM);
151: if (browserAction != null) {
152: if (!browserAction
153: .equalsIgnoreCase(BROWSER_ACTION_REFRESH_PAGE)) {
154: // for Refresh, there is nothing special to do - current history page will be re-displayed
155: WebContentHistoryList history = (WebContentHistoryList) PortletMessaging
156: .receive(actionRequest, HISTORY);
157:
158: if (browserAction
159: .equalsIgnoreCase(BROWSER_ACTION_PREVIOUS_PAGE)) {
160: if (history.hasPreviousPage())
161: history.getPreviousPage();
162: } else if (browserAction
163: .equalsIgnoreCase(BROWSER_ACTION_NEXT_PAGE)) {
164: if (history.hasNextPage())
165: history.getNextPage();
166: }
167: }
168:
169: return; // proceed to doView() with adjusted history
170: }
171:
172: // Check if an action parameter was defined
173: String webContentURL = actionRequest
174: .getParameter(WebContentRewriter.ACTION_PARAMETER_URL);
175: String webContentMethod = actionRequest
176: .getParameter(WebContentRewriter.ACTION_PARAMETER_METHOD);
177: Map webContentParams = new HashMap(actionRequest
178: .getParameterMap());
179:
180: // defaults
181: if (webContentMethod == null)
182: webContentMethod = ""; // default to GET
183:
184: // parameter map includes the URL (as ACTION_PARAMETER_URL), but all actual params as well
185: webContentParams
186: .remove(WebContentRewriter.ACTION_PARAMETER_URL);
187: webContentParams
188: .remove(WebContentRewriter.ACTION_PARAMETER_METHOD);
189:
190: if (webContentURL == null
191: || actionRequest.getPortletMode() == PortletMode.EDIT) {
192: processPreferencesAction(actionRequest, actionResponse);
193: webContentURL = actionRequest.getPreferences().getValue(
194: "SRC", "http://portals.apache.org");
195:
196: // parameters are for the EDIT mode form, and should not be propagated to the subsequent GET in doView
197: webContentParams.clear();
198: }
199:
200: /*
201: * If the webContentParameter is not empty attach the URL to the session
202: */
203: if (webContentURL != null && webContentURL.length() > 0) {
204: // new page visit - make it the current page in the history
205: WebContentHistoryList history = (WebContentHistoryList) PortletMessaging
206: .receive(actionRequest, HISTORY);
207: if (history == null)
208: history = new WebContentHistoryList();
209: history.visitPage(new WebContentHistoryPage(webContentURL,
210: webContentParams, webContentMethod));
211: PortletMessaging.publish(actionRequest, HISTORY, history);
212: }
213: }
214:
215: /**
216: * doView Renders the URL in the following order 1) SESSION_PARAMETER
217: * 2)cached version 3) defined for preference SRC
218: */
219: public void doView(RenderRequest request, RenderResponse response)
220: throws PortletException, IOException {
221: String viewPage = (String) request
222: .getAttribute(PARAM_VIEW_PAGE);
223: if (viewPage != null) {
224: super .doView(request, response);
225: return;
226: }
227:
228: // view the current page in the history
229: WebContentHistoryList history = (WebContentHistoryList) PortletMessaging
230: .receive(request, HISTORY);
231: if (history == null)
232: history = new WebContentHistoryList();
233: WebContentHistoryPage currentPage = history.getCurrentPage();
234: if (currentPage == null) {
235: String sourceURL = request.getPreferences().getValue("SRC",
236: "");
237: if (sourceURL == null) {
238: // BOZO - switch to edit mode automatically here, instead of throwing exception!
239: throw new PortletException(
240: "WebContent source not specified. Go to edit mode and specify an URL.");
241: }
242: currentPage = new WebContentHistoryPage(sourceURL);
243: }
244:
245: // Initialize the controller if it's not already done
246: if (rewriteController == null) {
247: PortletContext portletApplication = getPortletContext();
248: String path = portletApplication.getRealPath("/WEB-INF");
249: String contextPath = path + "/";
250: try {
251: // Create rewriter adaptor
252: rewriteController = getController(contextPath);
253: } catch (Exception e) {
254: // Failed to create rewriter controller
255: String msg = "WebContentPortlet failed to create rewriter controller.";
256: log.error(msg, e);
257: throw new PortletException(e.getMessage());
258: }
259: }
260:
261: // get content from current page
262: response.setContentType("text/html");
263: byte[] content = doWebContent(currentPage.getUrl(), currentPage
264: .getParams(), currentPage.isPost(), request, response);
265: // System.out.println("Rewritten content is\n..."+new String(content));
266:
267: // write the meta-control navigation header
268: PrintWriter writer = response.getWriter();
269: writer.print("<block>");
270: if (history.hasPreviousPage()) {
271: PortletURL prevAction = response.createActionURL();
272: prevAction.setParameter(BROWSER_ACTION_PARAM,
273: BROWSER_ACTION_PREVIOUS_PAGE);
274: writer.print(" [<a href=\"" + prevAction.toString()
275: + "\">Previous Page</a>] ");
276: }
277: PortletURL refreshAction = response.createActionURL();
278: refreshAction.setParameter(BROWSER_ACTION_PARAM,
279: BROWSER_ACTION_REFRESH_PAGE);
280: writer.print(" [<a href=\"" + refreshAction.toString()
281: + "\">Refresh Page</a>] ");
282: if (history.hasNextPage()) {
283: PortletURL nextAction = response.createActionURL();
284: nextAction.setParameter(BROWSER_ACTION_PARAM,
285: BROWSER_ACTION_NEXT_PAGE);
286: writer.print(" [<a href=\"" + nextAction.toString()
287: + "\">Next Page</a>] ");
288: }
289: writer.print("</block><hr/>");
290:
291: // drain the stream to the portlet window
292: ByteArrayInputStream bais = new ByteArrayInputStream(content);
293: drain(new InputStreamReader(bais,
294: WebContentPortlet.defaultEncoding), writer);
295: bais.close();
296:
297: // done, cache results in the history and save the history
298: history.visitPage(currentPage);
299: PortletMessaging.publish(request, HISTORY, history);
300: }
301:
302: public void doEdit(RenderRequest request, RenderResponse response)
303: throws PortletException, IOException {
304: response.setContentType("text/html");
305: doPreferencesEdit(request, response);
306: }
307:
308: /*
309: * Privaye helpers for generating WebContent
310: */
311: protected byte[] doWebContent(String sourceAttr, Map sourceParams,
312: boolean isPost, RenderRequest request,
313: RenderResponse response) throws PortletException {
314: HttpMethod httpMethod = null;
315:
316: try {
317: // Set the action and base URLs in the rewriter
318: PortletURL action = response.createActionURL();
319: ((WebContentRewriter) rewriter).setActionURL(action);
320: URL baseURL = new URL(sourceAttr);
321: rewriter.setBaseUrl(baseURL.toString());
322:
323: // ...file URLs may be used for testing
324: if (baseURL.getProtocol().equals("file")) {
325: Reader reader = new InputStreamReader(
326: (InputStream) baseURL.getContent());
327: StringWriter writer = new StringWriter();
328: rewriter.rewrite(rewriteController
329: .createParserAdaptor("text/html"), reader,
330: writer);
331: writer.flush();
332: return writer.toString().getBytes();
333: }
334: // else fall through to normal case (http/https)...
335:
336: // ...set up URL and HttpClient stuff
337: HttpClient httpClient = getHttpClient(request);
338: String method = (isPost) ? FORM_POST_METHOD
339: : FORM_GET_METHOD;
340: httpMethod = getHttpMethod(httpClient, getURLSource(
341: sourceAttr, sourceParams, request, response),
342: sourceParams, method, request);
343: byte[] result = doPreemptiveAuthentication(httpClient,
344: httpMethod, request, response);
345:
346: // ...get, cache, and return the content
347: if (result == null) {
348: return doHttpWebContent(httpClient, httpMethod, 0,
349: request, response);
350: } else {
351: return result;
352: }
353: } catch (PortletException pex) {
354: // already reported
355: throw pex;
356: } catch (Exception ex) {
357: String msg = "Exception while rewritting HTML content";
358: log.error(msg, ex);
359: throw new PortletException(msg + ", Error: "
360: + ex.getMessage());
361: } finally {
362: // release the http connection
363: if (httpMethod != null)
364: httpMethod.releaseConnection();
365: }
366: }
367:
368: protected byte[] doHttpWebContent(HttpClient httpClient,
369: HttpMethod httpMethod, int retryCount,
370: RenderRequest request, RenderResponse response)
371: throws PortletException {
372: try {
373: // Get the input stream from the provided httpClient/httpMethod
374: // System.out.println("WebContentPortlet.doHttpWebContent() - from path: "+httpMethod.getPath());
375:
376: // ...set up URL and HttpClient stuff
377: httpClient.executeMethod(httpMethod);
378:
379: // ...reset base URL with fully resolved path (e.g. if a directory, path will end with a /, which it may not have in the call to this method)
380: rewriter.setBaseUrl(rewriter.getBaseRelativeUrl(httpMethod
381: .getPath()));
382: // System.out.println("...reset base URL from final path: "+httpMethod.getPath());
383:
384: // ...save updated state
385: Cookie[] cookies = httpClient.getState().getCookies();
386: PortletMessaging.publish(request, HTTP_STATE, cookies);
387: // System.out.println("...saving: "+(cookies != null ? cookies.length : 0)+", cookies...");
388: // for(int i=0,limit = cookies != null ? cookies.length : 0; i<limit; i++) System.out.println("...cookie["+i+"] is: "+cookies[i]);
389:
390: // ...check for manual redirects
391: int responseCode = httpMethod.getStatusCode();
392: if (responseCode >= 300 && responseCode <= 399) {
393: // redirection that could not be handled automatically!!! (probably from a POST)
394: Header locationHeader = httpMethod
395: .getResponseHeader("location");
396: String redirectLocation = locationHeader != null ? locationHeader
397: .getValue()
398: : null;
399: if (redirectLocation != null) {
400: // System.out.println("WebContentPortlet.doHttpWebContent() >>>handling redirect to: "+redirectLocation+"<<<");
401:
402: // one more time (assume most params are already encoded & new URL is using GET protocol!)
403: return doWebContent(redirectLocation,
404: new HashMap(), false, request, response);
405: } else {
406: // The response is a redirect, but did not provide the new location for the resource.
407: throw new PortletException("Redirection code: "
408: + responseCode
409: + ", but with no redirectionLocation set.");
410: }
411: } else if (responseCode >= 400) {
412: if (responseCode == 401) {
413: if (httpMethod.getHostAuthState().isAuthRequested()
414: && retryCount++ < 1
415: && doRequestedAuthentication(httpClient,
416: httpMethod, request, response)) {
417: // try again, now that we are authorizied
418: return doHttpWebContent(httpClient, httpMethod,
419: retryCount, request, response);
420: } else {
421: // could not authorize
422: throw new PortletException(
423: "Site requested authorization, but we are unable to provide credentials");
424: }
425: } else if (retryCount++ < 3) {
426: log
427: .info("WebContentPortlet.doHttpWebContent() - retrying: "
428: + httpMethod.getPath()
429: + ", response code: "
430: + responseCode);
431:
432: // retry
433: return doHttpWebContent(httpClient, httpMethod,
434: retryCount, request, response);
435: } else {
436: // bad
437: throw new PortletException("Failure reading: "
438: + httpMethod.getPath()
439: + ", response code: " + responseCode);
440: }
441: }
442:
443: // System.out.println("...response code: "+responseCode+", fetching content as stream and rewriting.");
444:
445: // ...ok - *now* create the input stream and reader
446: BufferedInputStream bis = new BufferedInputStream(
447: httpMethod.getResponseBodyAsStream());
448: String encoding = ((HttpMethodBase) httpMethod)
449: .getResponseCharSet();
450: if (encoding == null)
451: encoding = getContentCharSet(bis);
452: Reader htmlReader = new InputStreamReader(bis, encoding);
453:
454: // get the output buffer
455: if (encoding == null)
456: encoding = WebContentPortlet.defaultEncoding;
457: ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
458: Writer htmlWriter = new OutputStreamWriter(
459: byteOutputStream, encoding);
460:
461: // rewrite and flush output
462: rewriter.rewrite(rewriteController
463: .createParserAdaptor("text/html"), htmlReader,
464: htmlWriter);
465: htmlWriter.flush();
466:
467: // Page has been rewritten
468: // TODO: Write it to cache
469: //System.out.println(new String(byteOutputStream.toByteArray()));
470: return byteOutputStream.toByteArray();
471: } catch (UnsupportedEncodingException ueex) {
472: throw new PortletException("Encoding " + defaultEncoding
473: + " not supported. Error: " + ueex.getMessage());
474: } catch (RewriterException rwe) {
475: throw new PortletException(
476: "Failed to rewrite HTML page. Error: "
477: + rwe.getMessage());
478: } catch (Exception e) {
479: throw new PortletException(
480: "Exception while rewritting HTML page. Error: "
481: + e.getMessage());
482: }
483: }
484:
485: protected String getURLSource(String source, Map params,
486: RenderRequest request, RenderResponse response) {
487: return source;
488: }
489:
490: protected byte[] doPreemptiveAuthentication(HttpClient clent,
491: HttpMethod method, RenderRequest request,
492: RenderResponse response) {
493: // derived class responsibilty - return true, if credentials have been set
494: return null;
495: }
496:
497: protected boolean doRequestedAuthentication(HttpClient clent,
498: HttpMethod method, RenderRequest request,
499: RenderResponse response) {
500: // derived class responsibilty - return true, if credentials have been set
501: return false;
502: }
503:
504: /*
505: * Generate a rewrite controller using the basic rules file
506: */
507: private RewriterController getController(String contextPath)
508: throws Exception {
509: Class[] rewriterClasses = new Class[] {
510: WebContentRewriter.class, WebContentRewriter.class };
511:
512: Class[] adaptorClasses = new Class[] { NekoParserAdaptor.class,
513: SaxParserAdaptor.class };
514: RewriterController rwc = new JetspeedRewriterController(
515: contextPath + "conf/rewriter-rules-mapping.xml", Arrays
516: .asList(rewriterClasses), Arrays
517: .asList(adaptorClasses));
518:
519: FileReader reader = new FileReader(contextPath
520: + "conf/default-rewriter-rules.xml");
521:
522: Ruleset ruleset = rwc.loadRuleset(reader);
523: reader.close();
524: rewriter = rwc.createRewriter(ruleset);
525: return rwc;
526: }
527:
528: protected HttpClient getHttpClient(RenderRequest request)
529: throws IOException {
530: // derived class hook (e.g. to set up Basic Authentication)
531: HttpClient client = new HttpClient();
532:
533: // reuse existing state, if we have been here before
534: Cookie[] cookies = (Cookie[]) PortletMessaging.receive(request,
535: HTTP_STATE);
536: if (cookies != null) {
537: // ...so far, just saving cookies - may need a more complex Serializable object here
538: client.getState().addCookies(cookies);
539:
540: // System.out.println("WebContentPortlet.getHttpClient() - reusing: "+cookies.length+", cookies...");
541: // for(int i=0,limit = cookies.length; i<limit; i++) System.out.println("...cookie["+i+"] is: "+cookies[i]);
542: }
543:
544: return client;
545: }
546:
547: protected HttpMethodBase getHttpMethod(HttpClient client,
548: String uri, Map params, String formMethod,
549: RenderRequest request) throws IOException {
550: formMethod = FORM_MULTIPART_METHOD;
551: HttpMethodBase httpMethod = null;
552: String useragentProperty = request.getProperty("User-Agent");
553: if (formMethod.equalsIgnoreCase(FORM_MULTIPART_METHOD)) {
554: // http mutipart
555: MultipartPostMethod mutlitPart = (MultipartPostMethod) (httpMethod = new MultipartPostMethod(
556: uri));
557: if (params != null && !params.isEmpty()) {
558: Iterator iter = params.entrySet().iterator();
559: while (iter.hasNext()) {
560: Map.Entry entry = (Map.Entry) iter.next();
561: String name = (String) entry.getKey();
562: String[] values = (String[]) entry.getValue();
563: if (values != null)
564: for (int i = 0, limit = values.length; i < limit; i++) {
565: // System.out.println("...adding >>>POST parameter: "+name+", with value: "+values[i]+"<<<");
566:
567: mutlitPart.addParameter(name, values[i]);
568: }
569: }
570: }
571:
572: } else if (formMethod.equalsIgnoreCase(FORM_GET_METHOD)) {
573:
574: // System.out.println("WebContentPortlet.getHttpMethod() - HTTP GET from URL: "+uri);
575: // http GET
576: httpMethod = new GetMethod(uri);
577: if (params != null && !params.isEmpty()) {
578: ArrayList pairs = new ArrayList();
579: Iterator iter = params.entrySet().iterator();
580: while (iter.hasNext()) {
581: Map.Entry entry = (Map.Entry) iter.next();
582: String name = (String) entry.getKey();
583: String[] values = (String[]) entry.getValue();
584: if (values != null)
585: for (int i = 0, limit = values.length; i < limit; i++) {
586: // System.out.println("...adding >>>GET parameter: "+name+", with value: "+values[i]+"<<<");
587: pairs
588: .add(new NameValuePair(name,
589: values[i]));
590: }
591: }
592: httpMethod.setQueryString((NameValuePair[]) pairs
593: .toArray(new NameValuePair[pairs.size()]));
594: }
595:
596: // automatically follow redirects (NOTE: not supported in POST - will throw exeception if you ask for it, then sees a redirect!!)
597: httpMethod.setFollowRedirects(true);
598: } else if (formMethod.equalsIgnoreCase(FORM_POST_METHOD)) {
599: // System.out.println("WebContentPortlet.getHttpMethod() - HTTP POST to URL: "+uri);
600:
601: // http POST
602: PostMethod postMethod = (PostMethod) (httpMethod = new PostMethod(
603: uri));
604: if (params != null && !params.isEmpty()) {
605: Iterator iter = params.entrySet().iterator();
606: while (iter.hasNext()) {
607: Map.Entry entry = (Map.Entry) iter.next();
608: String name = (String) entry.getKey();
609: String[] values = (String[]) entry.getValue();
610: if (values != null)
611: for (int i = 0, limit = values.length; i < limit; i++) {
612: // System.out.println("...adding >>>POST parameter: "+name+", with value: "+values[i]+"<<<");
613:
614: postMethod.addParameter(name, values[i]);
615: }
616: }
617: }
618: }
619:
620: // propagate User-Agent, so target site does not think we are a D.O.S. attack
621: httpMethod.addRequestHeader("User-Agent", useragentProperty);
622:
623: // BOZO - DON'T do this. default policy seems to be more flexible!!!
624: //httpMethod.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
625:
626: // ...ready to use!
627: return httpMethod;
628: }
629:
630: static final int BLOCK_SIZE = 4096;
631:
632: /*
633: private void drain(InputStream reader, OutputStream writer) throws IOException
634: {
635: byte[] bytes = new byte[BLOCK_SIZE];
636: try
637: {
638: int length = reader.read(bytes);
639: while (length != -1)
640: {
641: if (length != 0)
642: {
643: writer.write(bytes, 0, length);
644: }
645: length = reader.read(bytes);
646: }
647: }
648: finally
649: {
650: bytes = null;
651: }
652: }
653: */
654:
655: private void drain(Reader r, Writer w) throws IOException {
656: char[] bytes = new char[BLOCK_SIZE];
657: try {
658: int length = r.read(bytes);
659: while (length != -1) {
660: if (length != 0) {
661: w.write(bytes, 0, length);
662: }
663: length = r.read(bytes);
664: }
665: } finally {
666: bytes = null;
667: }
668:
669: }
670:
671: /*
672: private void drain(Reader r, OutputStream os) throws IOException
673: {
674: Writer w = new OutputStreamWriter(os);
675: drain(r, w);
676: w.flush();
677: }
678: */
679:
680: private String getContentCharSet(InputStream is) throws IOException {
681: if (!is.markSupported()) {
682: return null;
683: }
684:
685: byte[] buf = new byte[BLOCK_SIZE];
686: try {
687: is.mark(BLOCK_SIZE);
688: is.read(buf, 0, BLOCK_SIZE);
689: String content = new String(buf, "ISO-8859-1");
690: String lowerCaseContent = content.toLowerCase();
691: int startIndex = lowerCaseContent.indexOf("<head");
692: if (startIndex == -1) {
693: startIndex = 0;
694: }
695: int endIndex = lowerCaseContent.indexOf("</head");
696: if (endIndex == -1) {
697: endIndex = content.length();
698: }
699: content = content.substring(startIndex, endIndex);
700:
701: StringTokenizer st = new StringTokenizer(content, "<>");
702: while (st.hasMoreTokens()) {
703: String element = st.nextToken();
704: String lowerCaseElement = element.toLowerCase();
705: if (lowerCaseElement.startsWith("meta")
706: && lowerCaseElement.indexOf("content-type") > 0) {
707: StringTokenizer est = new StringTokenizer(element,
708: " =\"\';");
709: while (est.hasMoreTokens()) {
710: if (est.nextToken().equalsIgnoreCase("charset")) {
711: if (est.hasMoreTokens()) {
712: return est.nextToken();
713: }
714: }
715: }
716: }
717: }
718: } catch (IOException e) {
719: } finally {
720: is.reset();
721: }
722:
723: return null;
724: }
725: }
|