001: /*
002: * Copyright 2002-2008 Andy Clark
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.cyberneko.html.filters;
018:
019: import org.apache.xerces.xni.Augmentations;
020: import org.apache.xerces.xni.QName;
021: import org.apache.xerces.xni.XMLAttributes;
022: import org.apache.xerces.xni.XNIException;
023: import org.cyberneko.html.HTMLEventInfo;
024:
025: /**
026: * This filter performs the identity operation of the original
027: * document event stream generated by the HTML scanner by removing
028: * events that are synthesized by the tag balancer. This operation
029: * is essentially the same as turning off tag-balancing in the
030: * parser. However, this filter is useful when you want the tag
031: * balancer to report "errors" but do not want the synthesized
032: * events in the output.
033: * <p>
034: * <strong>Note:</strong>
035: * This filter requires the augmentations feature to be turned on.
036: * For example:
037: * <pre>
038: * XMLParserConfiguration parser = new HTMLConfiguration();
039: * parser.setFeature("http://cyberneko.org/html/features/augmentations", true);
040: * </pre>
041: * <p>
042: * <strong>Note:</strong>
043: * This isn't <em>exactly</em> the identify transform because the
044: * element and attributes names may have been modified from the
045: * original document. For example, by default, NekoHTML converts
046: * element names to upper-case and attribute names to lower-case.
047: *
048: * @author Andy Clark
049: *
050: * @version $Id: Identity.java,v 1.4 2005/02/14 03:56:54 andyc Exp $
051: */
052: public class Identity extends DefaultFilter {
053:
054: //
055: // Constants
056: //
057:
058: /** Augmentations feature identifier. */
059: protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
060:
061: /** Filters property identifier. */
062: protected static final String FILTERS = "http://cyberneko.org/html/properties/filters";
063:
064: //
065: // XMLDocumentHandler methods
066: //
067:
068: /** Start element. */
069: public void startElement(QName element, XMLAttributes attributes,
070: Augmentations augs) throws XNIException {
071: if (augs == null || !synthesized(augs)) {
072: super .startElement(element, attributes, augs);
073: }
074: } // startElement(QName,XMLAttributes,Augmentations)
075:
076: /** Empty element. */
077: public void emptyElement(QName element, XMLAttributes attributes,
078: Augmentations augs) throws XNIException {
079: if (augs == null || !synthesized(augs)) {
080: super .emptyElement(element, attributes, augs);
081: }
082: } // emptyElement(QName,XMLAttributes,Augmentations)
083:
084: /** End element. */
085: public void endElement(QName element, Augmentations augs)
086: throws XNIException {
087: if (augs == null || !synthesized(augs)) {
088: super .endElement(element, augs);
089: }
090: } // endElement(QName,XMLAttributes,Augmentations)
091:
092: //
093: // Protected static methods
094: //
095:
096: /** Returns true if the information provided is synthesized. */
097: protected static boolean synthesized(Augmentations augs) {
098: HTMLEventInfo info = (HTMLEventInfo) augs
099: .getItem(AUGMENTATIONS);
100: return info != null ? info.isSynthesized() : false;
101: } // synthesized(Augmentations):boolean
102:
103: } // class Identity
|