001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/util/CmsHtmlTagRemoveFactory.java,v $
003: * Date : $Date: 2008-02-27 12:05:36 $
004: * Version: $Revision: 1.7 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.util;
033:
034: import org.opencms.main.CmsLog;
035:
036: import java.util.Set;
037: import java.util.TreeSet;
038: import java.util.Vector;
039:
040: import org.apache.commons.logging.Log;
041:
042: import org.htmlparser.Attribute;
043: import org.htmlparser.Node;
044: import org.htmlparser.NodeFilter;
045: import org.htmlparser.PrototypicalNodeFactory;
046: import org.htmlparser.Tag;
047: import org.htmlparser.lexer.Page;
048: import org.htmlparser.scanners.Scanner;
049: import org.htmlparser.util.NodeList;
050: import org.htmlparser.util.ParserException;
051: import org.htmlparser.util.SimpleNodeIterator;
052: import org.htmlparser.visitors.NodeVisitor;
053:
054: /**
055: *
056: * A tag factory for htmlparser that is able to "remove tags".<p>
057: *
058: * Create an instance, add the {@link org.htmlparser.Tag} instances to remove and assign this
059: * factory to the {@link org.htmlparser.Parser} before starting a visit. A demo usage is shown in
060: * <code>CmsTagReplaceParser</code>.<p>
061: *
062: * The tags are not actually removed: They are linked in the document object model tree of the HTML
063: * that the parser generates. They just will not accept any {@link NodeVisitor} instances and
064: * therefore be invisible in any output a visitor will generate from the visited tree.<p>
065: *
066: * The decision whether a tag is removed can be controlled in two ways:
067: * <ol>
068: * <li>
069: * <code>{@link #addTagRemoval(Tag)}</code><br/>
070: * <p>
071: * The given tag will be removed ("invisible in the DOM").
072: * </p>
073: * </li>
074: * <li>
075: * <code>{@link #addTagPreserve(Tag)}</code><br/>
076: * <p>
077: * The given tag will be kept as-is. The following behaviour happens if this method is used:
078: * <ol>
079: * <li>
080: * Once <code>{@link #addTagPreserve(Tag)}</code> has been called all Tags that are not added
081: * to this method will be removed. <strong>We are in include mode then</strong>.
082: * </li>
083: * <li>
084: * The Tags provided to <code>{@link #addTagRemoval(Tag)}</code> will only have the
085: * power to hide exactly the same tags that are given to <code>{@link #addTagPreserve(Tag)}</code>:
086: * <strong>Deny is stronger than allow.</strong>
087: * </li>
088: * </ol>
089: * </p>
090: * </li>
091: * </ol>
092: *
093: * @author Achim Westermann
094: *
095: * @version $Revision: 1.7 $
096: *
097: * @since 6.1.8
098: */
099: public final class CmsHtmlTagRemoveFactory extends
100: PrototypicalNodeFactory {
101:
102: /**
103: * A Tag implementation that will not accept any {@link NodeVisitor} stopping by.<p>
104: *
105: * When visiting the corresponding tree of tags, this tag will be there but the visitor will not
106: * see it as it is not accepted. This allows "elimination" of this tag in the output the visitor
107: * generates from the document object model (e.g. HTML code again).<p>
108: *
109: * Potential child tags will be visible to visitors (unless they are instances of this class).<p>
110: *
111: * @author Achim Westermann
112: *
113: * @version $Revision: 1.7 $
114: *
115: * @since 6.1.8
116: */
117: private static final class CmsInvisibleTag implements Tag {
118:
119: /** Generated serial version UID. */
120: private static final long serialVersionUID = -3397880117291165819L;
121:
122: /** The real underlying tag. */
123: private Tag m_decorated;
124:
125: /**
126: * Constructor with the delegate to wrap.
127: * <p>
128: *
129: * Every property is accessed transparently from the delegate, except that visitors are not
130: * welcome.
131: * <p>
132: *
133: * @param delegate the tag to hide.
134: */
135: CmsInvisibleTag(Tag delegate) {
136:
137: m_decorated = delegate;
138: }
139:
140: /**
141: * @see org.htmlparser.Tag#accept(org.htmlparser.visitors.NodeVisitor)
142: */
143: public void accept(NodeVisitor visitor) {
144:
145: // be invisible but show the children (if they like visits)
146: NodeList children = m_decorated.getChildren();
147: if (children == null) {
148: return;
149: }
150: SimpleNodeIterator itChildren = children.elements();
151: while (itChildren.hasMoreNodes()) {
152: itChildren.nextNode().accept(visitor);
153: }
154: }
155:
156: /**
157: * @see org.htmlparser.Tag#breaksFlow()
158: */
159: public boolean breaksFlow() {
160:
161: return m_decorated.breaksFlow();
162: }
163:
164: /**
165: * @see org.htmlparser.Node#clone()
166: */
167: public Object clone() throws CloneNotSupportedException {
168:
169: return m_decorated.clone();
170: }
171:
172: /**
173: * @see org.htmlparser.Node#collectInto(org.htmlparser.util.NodeList,
174: * org.htmlparser.NodeFilter)
175: */
176: public void collectInto(NodeList arg0, NodeFilter arg1) {
177:
178: m_decorated.collectInto(arg0, arg1);
179: }
180:
181: /**
182: * @see org.htmlparser.Node#doSemanticAction()
183: */
184: public void doSemanticAction() throws ParserException {
185:
186: m_decorated.doSemanticAction();
187: }
188:
189: /**
190: * @see org.htmlparser.Tag#getAttribute(java.lang.String)
191: */
192: public String getAttribute(String arg0) {
193:
194: return m_decorated.getAttribute(arg0);
195: }
196:
197: /**
198: * @see org.htmlparser.Tag#getAttributeEx(java.lang.String)
199: */
200: public Attribute getAttributeEx(String arg0) {
201:
202: return m_decorated.getAttributeEx(arg0);
203: }
204:
205: /**
206: * @see org.htmlparser.Tag#getAttributesEx()
207: */
208: public Vector getAttributesEx() {
209:
210: return m_decorated.getAttributesEx();
211: }
212:
213: /**
214: * @see org.htmlparser.Node#getChildren()
215: */
216: public NodeList getChildren() {
217:
218: return m_decorated.getChildren();
219: }
220:
221: /**
222: * @see org.htmlparser.Tag#getEnders()
223: */
224: public String[] getEnders() {
225:
226: return m_decorated.getEnders();
227: }
228:
229: /**
230: * @see org.htmlparser.Tag#getEndingLineNumber()
231: */
232: public int getEndingLineNumber() {
233:
234: return m_decorated.getEndingLineNumber();
235: }
236:
237: /**
238: * @see org.htmlparser.Node#getEndPosition()
239: */
240: public int getEndPosition() {
241:
242: return m_decorated.getEndPosition();
243: }
244:
245: /**
246: * @see org.htmlparser.Tag#getEndTag()
247: */
248: public Tag getEndTag() {
249:
250: return m_decorated.getEndTag();
251: }
252:
253: /**
254: * @see org.htmlparser.Tag#getEndTagEnders()
255: */
256: public String[] getEndTagEnders() {
257:
258: return m_decorated.getEndTagEnders();
259: }
260:
261: /**
262: * @see org.htmlparser.Node#getFirstChild()
263: */
264: public Node getFirstChild() {
265:
266: return m_decorated.getFirstChild();
267: }
268:
269: /**
270: * @see org.htmlparser.Tag#getIds()
271: */
272: public String[] getIds() {
273:
274: return m_decorated.getIds();
275: }
276:
277: /**
278: * @see org.htmlparser.Node#getLastChild()
279: */
280: public Node getLastChild() {
281:
282: return m_decorated.getLastChild();
283: }
284:
285: /**
286: * @see org.htmlparser.Node#getNextSibling()
287: */
288: public Node getNextSibling() {
289:
290: return m_decorated.getNextSibling();
291: }
292:
293: /**
294: * @see org.htmlparser.Node#getPage()
295: */
296: public Page getPage() {
297:
298: return m_decorated.getPage();
299: }
300:
301: /**
302: * @see org.htmlparser.Node#getParent()
303: */
304: public Node getParent() {
305:
306: return m_decorated.getParent();
307: }
308:
309: /**
310: * @see org.htmlparser.Node#getPreviousSibling()
311: */
312: public Node getPreviousSibling() {
313:
314: return m_decorated.getPreviousSibling();
315: }
316:
317: /**
318: * @see org.htmlparser.Tag#getRawTagName()
319: */
320: public String getRawTagName() {
321:
322: return m_decorated.getRawTagName();
323: }
324:
325: /**
326: * @see org.htmlparser.Tag#getStartingLineNumber()
327: */
328: public int getStartingLineNumber() {
329:
330: return m_decorated.getStartingLineNumber();
331: }
332:
333: /**
334: * @see org.htmlparser.Node#getStartPosition()
335: */
336: public int getStartPosition() {
337:
338: return m_decorated.getStartPosition();
339: }
340:
341: /**
342: * @see org.htmlparser.Tag#getTagName()
343: */
344: public String getTagName() {
345:
346: return m_decorated.getTagName();
347: }
348:
349: /**
350: * @see org.htmlparser.Node#getText()
351: */
352: public String getText() {
353:
354: return m_decorated.getText();
355: }
356:
357: /**
358: * @see org.htmlparser.Tag#getThisScanner()
359: */
360: public Scanner getThisScanner() {
361:
362: return m_decorated.getThisScanner();
363: }
364:
365: /**
366: * @see org.htmlparser.Tag#isEmptyXmlTag()
367: */
368: public boolean isEmptyXmlTag() {
369:
370: return m_decorated.isEmptyXmlTag();
371: }
372:
373: /**
374: * @see org.htmlparser.Tag#isEndTag()
375: */
376: public boolean isEndTag() {
377:
378: return m_decorated.isEndTag();
379: }
380:
381: /**
382: * @see org.htmlparser.Tag#removeAttribute(java.lang.String)
383: */
384: public void removeAttribute(String arg0) {
385:
386: m_decorated.removeAttribute(arg0);
387: }
388:
389: /**
390: * @see org.htmlparser.Tag#setAttribute(java.lang.String, java.lang.String)
391: */
392: public void setAttribute(String arg0, String arg1) {
393:
394: m_decorated.setAttribute(arg0, arg1);
395: }
396:
397: /**
398: * @see org.htmlparser.Tag#setAttribute(java.lang.String, java.lang.String, char)
399: */
400: public void setAttribute(String arg0, String arg1, char arg2) {
401:
402: m_decorated.setAttribute(arg0, arg1, arg2);
403: }
404:
405: /**
406: * @see org.htmlparser.Tag#setAttributeEx(org.htmlparser.Attribute)
407: */
408: public void setAttributeEx(Attribute arg0) {
409:
410: m_decorated.setAttributeEx(arg0);
411: }
412:
413: /**
414: * @see org.htmlparser.Tag#setAttributesEx(java.util.Vector)
415: */
416: public void setAttributesEx(Vector arg0) {
417:
418: m_decorated.setAttributesEx(arg0);
419: }
420:
421: /**
422: * @see org.htmlparser.Node#setChildren(org.htmlparser.util.NodeList)
423: */
424: public void setChildren(NodeList arg0) {
425:
426: m_decorated.setChildren(arg0);
427: }
428:
429: /**
430: * @see org.htmlparser.Tag#setEmptyXmlTag(boolean)
431: */
432: public void setEmptyXmlTag(boolean arg0) {
433:
434: m_decorated.setEmptyXmlTag(arg0);
435: }
436:
437: /**
438: * @see org.htmlparser.Node#setEndPosition(int)
439: */
440: public void setEndPosition(int arg0) {
441:
442: m_decorated.setEndPosition(arg0);
443: }
444:
445: /**
446: * @see org.htmlparser.Tag#setEndTag(org.htmlparser.Tag)
447: */
448: public void setEndTag(Tag arg0) {
449:
450: m_decorated.setEndTag(arg0);
451: }
452:
453: /**
454: * @see org.htmlparser.Node#setPage(org.htmlparser.lexer.Page)
455: */
456: public void setPage(Page arg0) {
457:
458: m_decorated.setPage(arg0);
459: }
460:
461: /**
462: * @see org.htmlparser.Node#setParent(org.htmlparser.Node)
463: */
464: public void setParent(Node arg0) {
465:
466: m_decorated.setParent(arg0);
467: }
468:
469: /**
470: * @see org.htmlparser.Node#setStartPosition(int)
471: */
472: public void setStartPosition(int arg0) {
473:
474: m_decorated.setStartPosition(arg0);
475: }
476:
477: /**
478: * @see org.htmlparser.Tag#setTagName(java.lang.String)
479: */
480: public void setTagName(String arg0) {
481:
482: m_decorated.setTagName(arg0);
483: }
484:
485: /**
486: * @see org.htmlparser.Node#setText(java.lang.String)
487: */
488: public void setText(String arg0) {
489:
490: m_decorated.setText(arg0);
491: }
492:
493: /**
494: * @see org.htmlparser.Tag#setThisScanner(org.htmlparser.scanners.Scanner)
495: */
496: public void setThisScanner(Scanner arg0) {
497:
498: m_decorated.setThisScanner(arg0);
499: }
500:
501: /**
502: * @see org.htmlparser.Node#toHtml()
503: */
504: public String toHtml() {
505:
506: return m_decorated.toHtml();
507: }
508:
509: /**
510: * @see org.htmlparser.Node#toHtml(boolean)
511: */
512: public String toHtml(boolean value) {
513:
514: return m_decorated.toHtml(value);
515: }
516:
517: /**
518: * @see org.htmlparser.Node#toPlainTextString()
519: */
520: public String toPlainTextString() {
521:
522: return m_decorated.toPlainTextString();
523: }
524:
525: /**
526: * @see org.htmlparser.Node#toString()
527: */
528: public String toString() {
529:
530: return m_decorated.toString();
531: }
532: }
533:
534: /** The log object for this class. */
535: private static final Log LOG = CmsLog
536: .getLog(CmsHtmlTagRemoveFactory.class);
537:
538: /** Generated serial version UID. */
539: private static final long serialVersionUID = 6961158563666656633L;
540:
541: /** The tags to hide tothe node visitors. */
542: private Set m_invisibleTags;
543:
544: /** The tags to show to the node visitors. */
545: private Set m_visibleTags;
546:
547: /**
548: * Create a new factory with all tags registered.
549: * <p>
550: *
551: */
552: public CmsHtmlTagRemoveFactory() {
553:
554: super ();
555: m_invisibleTags = new TreeSet();
556: m_visibleTags = new TreeSet();
557: }
558:
559: /**
560: * Add a tag that will be visible for {@link NodeVisitor} instances.
561: * <p>
562: *
563: * Not only "this" tag will be visible but all parsed Tags that have the same name (case
564: * insensitive).
565: * <p>
566: *
567: * The given tag will be kept as-is. The following behaviour happens if this method is used:
568: * <ol>
569: * <li>
570: * Once <code>{@link #addTagPreserve(Tag)}</code> has been called all Tags that are not added
571: * to this method will be removed. <strong>We are in include mode then</strong>.
572: * </li>
573: * <li>
574: * The Tags provided to <code>{@link #addTagRemoval(Tag)}</code> will only have the
575: * power to hide exactly the same tags that are given to <code>{@link #addTagPreserve(Tag)}</code>:
576: * <strong>Deny is stronger than allow.</strong>
577: * </li>
578: * </ol>
579: * <p>
580: *
581: *
582: * @param tag the tag that will be visible for all {@link NodeVisitor} instances.
583: *
584: * @return true if the tag was added to the internal set of tags to keep, false if not (was
585: * contained before, has no name,...).
586: */
587: public boolean addTagPreserve(final Tag tag) {
588:
589: boolean result = false;
590: String tagName = tag.getTagName();
591: if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(tagName)) {
592: result = m_visibleTags.add(tagName.toLowerCase());
593: }
594: return result;
595:
596: }
597:
598: /**
599: * Add a tag that will be invisible for {@link NodeVisitor} instances.
600: * <p>
601: *
602: * Not only "this" tag will be invisible but all parsed Tags that have the same name (case
603: * insensitive).
604: * <p>
605: *
606: * @param tag the tag that will be visible for all {@link NodeVisitor} instances.
607: *
608: * @return true if the tag was added to the internal set of tags to remove, false if not (was
609: * contained before, has no name,...).
610: */
611: public boolean addTagRemoval(final Tag tag) {
612:
613: boolean result = false;
614: String tagName = tag.getTagName();
615: if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(tagName)) {
616: result = m_invisibleTags.add(tagName.toLowerCase());
617: }
618: return result;
619: }
620:
621: /**
622: * @see org.htmlparser.PrototypicalNodeFactory#createTagNode(org.htmlparser.lexer.Page, int,
623: * int, java.util.Vector)
624: */
625: public Tag createTagNode(Page arg0, int arg1, int arg2, Vector arg3) {
626:
627: try {
628: String tagName = ((Attribute) arg3.get(0)).getName()
629: .toLowerCase();
630: // end tags have names like "/a"....
631: if (tagName.charAt(0) == '/') {
632: tagName = tagName.substring(1);
633: }
634: Tag result = super .createTagNode(arg0, arg1, arg2, arg3);
635: if (!keepTag(tagName)) {
636: result = new CmsInvisibleTag(result);
637: }
638: return result;
639: } catch (RuntimeException rte) {
640: if (LOG.isErrorEnabled()) {
641: // log here, as htmlparser 1.5 did swallow exceptions from here and threw NPEs from
642: // other places
643: LOG.error(rte);
644: }
645: throw rte;
646: }
647: }
648:
649: /**
650: * Encapsulation of the "preserve / remove" logic.<p>
651: *
652: * @param tagName the lower case name of the tag to keep or hide
653: *
654: * @return if true the given Tag will be kept, if false it will be removed
655: */
656: private boolean keepTag(final String tagName) {
657:
658: boolean result = false;
659: // include mode:
660: if (m_visibleTags.size() > 0) {
661: if (m_visibleTags.contains(tagName)) {
662: result = true;
663: } else {
664: result = false;
665: }
666: }
667: // Power of hide: if no visible tags configured this works as a normal remove,
668: // if visible tags are configured this can change a visible tag to be invisible
669: if (m_invisibleTags.contains(tagName)) {
670: result = false;
671: }
672:
673: return result;
674: }
675: }
|