001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.jetspeed.rewriter.html;
018:
019: import java.io.IOException;
020: import java.io.Reader;
021: import java.io.Writer;
022: import java.util.Enumeration;
023:
024: import javax.swing.text.MutableAttributeSet;
025: import javax.swing.text.html.HTML;
026: import javax.swing.text.html.HTMLEditorKit;
027:
028: import org.apache.commons.logging.Log;
029: import org.apache.commons.logging.LogFactory;
030: import org.apache.jetspeed.rewriter.ParserAdaptor;
031: import org.apache.jetspeed.rewriter.Rewriter;
032: import org.apache.jetspeed.rewriter.RewriterException;
033:
034: /**
035: * HTML Parser Adaptor for the Swing 'HotJava' parser.
036: *
037: * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
038: * @version $Id: SwingParserAdaptor.java 516448 2007-03-09 16:25:47Z ate $
039: */
040: public class SwingParserAdaptor implements ParserAdaptor {
041: protected final static Log log = LogFactory
042: .getLog(SwingParserAdaptor.class);
043:
044: private SwingParserAdaptor.Callback callback = null;
045: private String lineSeparator;
046: private boolean skippingImplied = false;
047: private Rewriter rewriter;
048:
049: /*
050: * Construct a swing (hot java) parser adaptor
051: * Receives a Rewriter parameter, which is used as a callback when rewriting URLs.
052: * The rewriter object executes the implementation specific URL rewriting.
053: *
054: * @param rewriter The rewriter object that is called back during URL rewriting
055: */
056: public SwingParserAdaptor() {
057: lineSeparator = System.getProperty("line.separator", "\r\n");
058: }
059:
060: /*
061: * Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback
062: *
063: *
064: * @param reader The input stream reader
065: *
066: * @throws MalformedURLException
067: *
068: * @return An HTML-String with rewritten URLs.
069: */
070: public void rewrite(Rewriter rewriter, Reader reader, Writer writer)
071: throws RewriterException {
072: try {
073: this .rewriter = rewriter;
074: HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter()
075: .getParser();
076: callback = new SwingParserAdaptor.Callback(writer);
077: parser.parse(reader, callback, true);
078: } catch (Exception e) {
079: e.printStackTrace();
080: throw new RewriterException(e);
081: }
082: }
083:
084: public void parse(Rewriter rewriter, Reader reader)
085: throws RewriterException {
086: try {
087: this .rewriter = rewriter;
088: HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter()
089: .getParser();
090: callback = new SwingParserAdaptor.Callback(null);
091: parser.parse(reader, callback, true);
092: } catch (Exception e) {
093: e.printStackTrace();
094: throw new RewriterException(e);
095: }
096: }
097:
098: /*
099: * This Class is needed, because getParser is protected and therefore
100: * only accessibly by a subclass
101: */
102: class ParserGetter extends HTMLEditorKit {
103:
104: public HTMLEditorKit.Parser getParser() {
105: return super .getParser();
106: }
107: }
108:
109: /*
110: * Swing Parser Callback from the HTMLEditorKit.
111: * This class handles all SAX-like events during parsing.
112: *
113: */
114: class Callback extends HTMLEditorKit.ParserCallback {
115: // either handling of <FORM> is buggy, or I made some weird mistake ...
116: // ... JDK 1.3 sends double "</form>"-tags on closing <form>
117: private boolean inForm = false;
118: private boolean inScript = false;
119: private boolean strip = false;
120: private boolean simpleTag = false;
121: private String stripTag = null;
122: private Writer writer = null;
123:
124: private Callback(Writer writer) {
125: this .writer = writer;
126: }
127:
128: //
129: // -------------- Hot Java event callbacks... --------------------
130: //
131:
132: /*
133: * Hot Java event callback for text (all data in between tags)
134: *
135: * @param values The array of characters containing the text.
136: */
137: public void handleText(char[] values, int param) {
138: if (strip) {
139: return;
140: }
141: if (values[0] == '>') {
142: return;
143: }
144: if (false == rewriter.enterText(values, param)) {
145: return;
146: }
147:
148: addToResult(values);
149: }
150:
151: private void write(String text) throws IOException {
152: if (writer != null) {
153: writer.write(text);
154: }
155: }
156:
157: /*
158: * Hot Java event callback for handling a simple tag (without begin/end)
159: *
160: * @param tag The HTML tag being handled.
161: * @param attrs The mutable HTML attribute set for the current HTML element.
162: * @param position the position of the tag.
163: *
164: */
165: public void handleSimpleTag(HTML.Tag htmlTag,
166: MutableAttributeSet attrs, int param) {
167: String tag = htmlTag.toString();
168:
169: if (false == rewriter.enterSimpleTagEvent(tag,
170: new SwingAttributes(attrs))) {
171: return;
172: }
173:
174: if (strip) {
175: return;
176: }
177:
178: if (rewriter.shouldStripTag(tag)) {
179: return;
180: }
181:
182: if (rewriter.shouldRemoveTag(tag)) {
183: return;
184: }
185:
186: try {
187: simpleTag = true;
188: appendTagToResult(htmlTag, attrs);
189: write(lineSeparator);
190: /*
191: if (tag.toString().equalsIgnoreCase("param") ||
192: tag.toString().equalsIgnoreCase("object") ||
193: tag.toString().equalsIgnoreCase("embed"))
194: {
195: write(lineSeparator);
196: }
197: */
198: simpleTag = false;
199: String appended = rewriter.exitSimpleTagEvent(tag,
200: new SwingAttributes(attrs));
201: if (null != appended) {
202: write(appended);
203: }
204: } catch (Exception e) {
205: log.error("Simple tag parsing error", e);
206: }
207: }
208:
209: /*
210: * Hot Java event callback for handling a start tag.
211: *
212: * @param tag The HTML tag being handled.
213: * @param attrs The mutable HTML attribute set for the current HTML element.
214: * @param position the position of the tag.
215: *
216: */
217: public void handleStartTag(HTML.Tag htmlTag,
218: MutableAttributeSet attrs, int position) {
219: String tag = htmlTag.toString();
220:
221: if (false == rewriter.enterStartTagEvent(tag,
222: new SwingAttributes(attrs))) {
223: return;
224: }
225:
226: if (strip) {
227: return;
228: }
229:
230: if (rewriter.shouldStripTag(tag)) {
231: stripTag = tag;
232: strip = true;
233: return;
234: }
235:
236: if (rewriter.shouldRemoveTag(tag)) {
237: return;
238: }
239:
240: try {
241: appendTagToResult(htmlTag, attrs);
242: formatLine(htmlTag);
243: String appended = rewriter.exitStartTagEvent(tag,
244: new SwingAttributes(attrs));
245: if (null != appended) {
246: write(appended);
247: }
248: } catch (Exception e) {
249: log.error("Start tag parsing error", e);
250: }
251:
252: }
253:
254: /*
255: * Hot Java event callback for handling an end tag.
256: *
257: * @param tag The HTML tag being handled.
258: * @param position the position of the tag.
259: *
260: */
261: public void handleEndTag(HTML.Tag htmlTag, int position) {
262: String tag = htmlTag.toString();
263: if (false == rewriter.enterEndTagEvent(tag.toString())) {
264: return;
265: }
266:
267: if (strip) {
268: if (tag.equalsIgnoreCase(stripTag)) {
269: strip = false;
270: stripTag = null;
271: }
272: return;
273: }
274:
275: if (rewriter.shouldRemoveTag(tag)) {
276: return;
277: }
278:
279: try {
280: addToResult("</").addToResult(tag).addToResult(">");
281:
282: // formatLine(htmlTag);
283: write(lineSeparator);
284:
285: String appended = rewriter.exitEndTagEvent(tag);
286: if (null != appended) {
287: write(appended);
288: }
289: } catch (Exception e) {
290: log.error("End tag parsing error", e);
291: }
292: }
293:
294: /*
295: * Hot Java event callback for handling errors.
296: *
297: * @param str The error message from Swing.
298: * @param param A parameter passed to handler.
299: *
300: */
301: public void handleError(java.lang.String str, int param) {
302: // System.out.println("Handling error: " + str);
303: }
304:
305: /*
306: * Hot Java event callback for HTML comments.
307: *
308: * @param values The character array of text comments.
309: * @param param A parameter passed to handler.
310: *
311: */
312: public void handleComment(char[] values, int param) {
313: if (strip || rewriter.shouldRemoveComments()) {
314: return;
315: }
316: addToResult("<!-- ").addToResult(values)
317: .addToResult(" -->").addToResult(lineSeparator);
318: }
319:
320: /*
321: * Hot Java event callback for end of line strings.
322: *
323: * @param str The end-of-line string.
324: *
325: */
326: public void handleEndOfLineString(java.lang.String str) {
327: if (strip) {
328: return;
329: }
330:
331: addToResult(lineSeparator);
332: addToResult(str);
333: }
334:
335: /*
336: * Prints new lines to make the output a little easier to read when debugging.
337: *
338: * @param tag The HTML tag being handled.
339: *
340: */
341: private void formatLine(HTML.Tag tag) {
342: try {
343: if (tag.isBlock() || tag.breaksFlow()
344: || tag == HTML.Tag.FRAME
345: || tag == HTML.Tag.FRAMESET
346: || tag == HTML.Tag.SCRIPT) {
347: write(lineSeparator);
348: }
349:
350: } catch (Exception e) {
351: log.error("Format Line tag parsing error", e);
352: }
353:
354: }
355:
356: /*
357: * Used to write tag and attribute objects to the output stream.
358: * Returns a reference to itself so that these calls can be chained.
359: *
360: * @param txt Any text to be written out to stream with toString method.
361: * The object being written should implement its toString method.
362: * @return A handle to the this, the callback, for chaining results.
363: *
364: */
365: private Callback addToResult(Object txt) {
366: // to allow for implementation using Stringbuffer or StringWriter
367: // I don't know yet, which one is better in this case
368: //if (ignoreLevel > 0 ) return this;
369:
370: try {
371: write(txt.toString());
372: } catch (Exception e) {
373: System.err.println("Error parsing:" + e);
374: }
375: return this ;
376: }
377:
378: /*
379: * Used to write all character content to the output stream.
380: * Returns a reference to itself so that these calls can be chained.
381: *
382: * @param txt Any character text to be written out directly to stream.
383: * @return A handle to the this, the callback, for chaining results.
384: *
385: */
386: private Callback addToResult(char[] txt) {
387: //if (ignoreLevel > 0) return this;
388:
389: try {
390: if (writer != null) {
391: writer.write(txt);
392: }
393:
394: } catch (Exception e) { /* ignore */
395: }
396: return this ;
397: }
398:
399: /*
400: * Accessor to the Callback's content-String
401: *
402: * @return Cleaned and rewritten HTML-Content
403: */
404: public void getResult() {
405: try {
406: if (writer != null) {
407: writer.flush();
408: }
409: } catch (Exception e) { /* ignore */
410: }
411:
412: // WARNING: doesn't work, if you remove " " + ... but don't know why
413: //String res = " " + result.toString();
414:
415: // return res;
416: }
417:
418: /*
419: * Flushes the output stream. NOT IMPLEMENTED
420: *
421: */
422: public void flush()
423: throws javax.swing.text.BadLocationException {
424: // nothing to do here ...
425: }
426:
427: /*
428: * Writes output to the final stream for all attributes of a given tag.
429: *
430: * @param tag The HTML tag being output.
431: * @param attrs The mutable HTML attribute set for the current HTML tag.
432: *
433: */
434: private void appendTagToResult(HTML.Tag tag,
435: MutableAttributeSet attrs) {
436: convertURLS(tag, attrs);
437: Enumeration e = attrs.getAttributeNames();
438: addToResult("<").addToResult(tag);
439: while (e.hasMoreElements()) {
440: Object attr = e.nextElement();
441: String value = attrs.getAttribute(attr).toString();
442: addToResult(" ").addToResult(attr).addToResult("=\"")
443: .addToResult(value).addToResult("\"");
444: }
445: if (simpleTag)
446: addToResult("/>");
447: else
448: addToResult(">");
449: }
450:
451: /*
452: * Determines which HTML Tag/Element is being inspected, and calls the
453: * appropriate converter for that context. This method contains all the
454: * logic for determining how tags are rewritten.
455: *
456: * @param tag TAG from the Callback-Interface.
457: * @param attrs The mutable HTML attribute set for the current HTML element.
458: */
459:
460: private void convertURLS(HTML.Tag tag, MutableAttributeSet attrs) {
461: rewriter.enterConvertTagEvent(tag.toString(),
462: new SwingAttributes(attrs));
463:
464: /*
465: if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
466: ignoreLevel ++;
467: */
468: }
469:
470: }
471:
472: }
|