Source Code Cross Referenced for ElementRemover.java in  » HTML-Parser » nekohtml » org » cyberneko » html » filters » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » HTML Parser » nekohtml » org.cyberneko.html.filters 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /* 
002:         * Copyright 2002-2008 Andy Clark
003:         * 
004:         * Licensed under the Apache License, Version 2.0 (the "License");
005:         * you may not use this file except in compliance with the License.
006:         * You may obtain a copy of the License at
007:         *
008:         *     http://www.apache.org/licenses/LICENSE-2.0
009:         *
010:         * Unless required by applicable law or agreed to in writing, software
011:         * distributed under the License is distributed on an "AS IS" BASIS,
012:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013:         * See the License for the specific language governing permissions and
014:         * limitations under the License.
015:         */
016:
017:        package org.cyberneko.html.filters;
018:
019:        import java.util.Hashtable;
020:
021:        import org.apache.xerces.xni.Augmentations;
022:        import org.apache.xerces.xni.NamespaceContext;
023:        import org.apache.xerces.xni.QName;
024:        import org.apache.xerces.xni.XMLAttributes;
025:        import org.apache.xerces.xni.XMLLocator;
026:        import org.apache.xerces.xni.XMLResourceIdentifier;
027:        import org.apache.xerces.xni.XMLString;
028:        import org.apache.xerces.xni.XNIException;
029:
030:        /**
031:         * This class is a document filter capable of removing specified
032:         * elements from the processing stream. There are two options for
033:         * processing document elements:
034:         * <ul>
035:         * <li>specifying those elements which should be accepted and,
036:         *     optionally, which attributes of that element should be
037:         *     kept; and
038:         * <li>specifying those elements whose tags and content should be
039:         *     completely removed from the event stream.
040:         * </ul>
041:         * <p>
042:         * The first option allows the application to specify which elements
043:         * appearing in the event stream should be accepted and, therefore,
044:         * passed on to the next stage in the pipeline. All elements 
045:         * <em>not</em> in the list of acceptable elements have their start 
046:         * and end tags stripped from the event stream <em>unless</em> those
047:         * elements appear in the list of elements to be removed. 
048:         * <p>
049:         * The second option allows the application to specify which elements
050:         * should be completely removed from the event stream. When an element
051:         * appears that is to be removed, the element's start and end tag as
052:         * well as all of that element's content is removed from the event
053:         * stream.
054:         * <p>
055:         * A common use of this filter would be to only allow rich-text
056:         * and linking elements as well as the character content to pass 
057:         * through the filter &mdash; all other elements would be stripped.
058:         * The following code shows how to configure this filter to perform
059:         * this task:
060:         * <pre>
061:         *  ElementRemover remover = new ElementRemover();
062:         *  remover.acceptElement("b", null);
063:         *  remover.acceptElement("i", null);
064:         *  remover.acceptElement("u", null);
065:         *  remover.acceptElement("a", new String[] { "href" });
066:         * </pre>
067:         * <p>
068:         * However, this would still allow the text content of other
069:         * elements to pass through, which may not be desirable. In order
070:         * to further "clean" the input, the <code>removeElement</code>
071:         * option can be used. The following piece of code adds the ability
072:         * to completely remove any &lt;SCRIPT&gt; tags and content 
073:         * from the stream.
074:         * <pre>
075:         *  remover.removeElement("script");
076:         * </pre>
077:         * <p>
078:         * <strong>Note:</strong> 
079:         * All text and accepted element children of a stripped element is 
080:         * retained. To completely remove an element's content, use the
081:         * <code>removeElement</code> method.
082:         * <p>
083:         * <strong>Note:</strong>
084:         * Care should be taken when using this filter because the output
085:         * may not be a well-balanced tree. Specifically, if the application
086:         * removes the &lt;HTML&gt; element (with or without retaining its
087:         * children), the resulting document event stream will no longer be
088:         * well-formed.
089:         *
090:         * @author Andy Clark
091:         *
092:         * @version $Id: ElementRemover.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
093:         */
094:        public class ElementRemover extends DefaultFilter {
095:
096:            //
097:            // Constants
098:            //
099:
100:            /** A "null" object. */
101:            protected static final Object NULL = new Object();
102:
103:            //
104:            // Data
105:            //
106:
107:            // information
108:
109:            /** Accepted elements. */
110:            protected Hashtable fAcceptedElements = new Hashtable();
111:
112:            /** Removed elements. */
113:            protected Hashtable fRemovedElements = new Hashtable();
114:
115:            // state
116:
117:            /** The element depth. */
118:            protected int fElementDepth;
119:
120:            /** The element depth at element removal. */
121:            protected int fRemovalElementDepth;
122:
123:            //
124:            // Public methods
125:            //
126:
127:            /** 
128:             * Specifies that the given element should be accepted and, optionally,
129:             * which attributes of that element should be kept.
130:             *
131:             * @param element The element to accept.
132:             * @param attributes The list of attributes to be kept or null if no
133:             *                   attributes should be kept for this element.
134:             *
135:             * see #removeElement
136:             */
137:            public void acceptElement(String element, String[] attributes) {
138:                Object key = element.toLowerCase();
139:                Object value = NULL;
140:                if (attributes != null) {
141:                    String[] newarray = new String[attributes.length];
142:                    for (int i = 0; i < attributes.length; i++) {
143:                        newarray[i] = attributes[i].toLowerCase();
144:                    }
145:                    value = attributes;
146:                }
147:                fAcceptedElements.put(key, value);
148:            } // acceptElement(String,String[])
149:
150:            /** 
151:             * Specifies that the given element should be completely removed. If an
152:             * element is encountered during processing that is on the remove list, 
153:             * the element's start and end tags as well as all of content contained
154:             * within the element will be removed from the processing stream.
155:             *
156:             * @param element The element to completely remove.
157:             */
158:            public void removeElement(String element) {
159:                Object key = element.toLowerCase();
160:                Object value = NULL;
161:                fRemovedElements.put(key, value);
162:            } // removeElement(String)
163:
164:            //
165:            // XMLDocumentHandler methods
166:            //
167:
168:            // since Xerces-J 2.2.0
169:
170:            /** Start document. */
171:            public void startDocument(XMLLocator locator, String encoding,
172:                    NamespaceContext nscontext, Augmentations augs)
173:                    throws XNIException {
174:                fElementDepth = 0;
175:                fRemovalElementDepth = Integer.MAX_VALUE;
176:                super .startDocument(locator, encoding, nscontext, augs);
177:            } // startDocument(XMLLocator,String,NamespaceContext,Augmentations)
178:
179:            // old methods
180:
181:            /** Start document. */
182:            public void startDocument(XMLLocator locator, String encoding,
183:                    Augmentations augs) throws XNIException {
184:                startDocument(locator, encoding, null, augs);
185:            } // startDocument(XMLLocator,String,Augmentations)
186:
187:            /** Start prefix mapping. */
188:            public void startPrefixMapping(String prefix, String uri,
189:                    Augmentations augs) throws XNIException {
190:                if (fElementDepth <= fRemovalElementDepth) {
191:                    super .startPrefixMapping(prefix, uri, augs);
192:                }
193:            } // startPrefixMapping(String,String,Augmentations)
194:
195:            /** Start element. */
196:            public void startElement(QName element, XMLAttributes attributes,
197:                    Augmentations augs) throws XNIException {
198:                if (fElementDepth <= fRemovalElementDepth
199:                        && handleOpenTag(element, attributes)) {
200:                    super .startElement(element, attributes, augs);
201:                }
202:                fElementDepth++;
203:            } // startElement(QName,XMLAttributes,Augmentations)
204:
205:            /** Empty element. */
206:            public void emptyElement(QName element, XMLAttributes attributes,
207:                    Augmentations augs) throws XNIException {
208:                if (fElementDepth <= fRemovalElementDepth
209:                        && handleOpenTag(element, attributes)) {
210:                    super .emptyElement(element, attributes, augs);
211:                }
212:            } // emptyElement(QName,XMLAttributes,Augmentations)
213:
214:            /** Comment. */
215:            public void comment(XMLString text, Augmentations augs)
216:                    throws XNIException {
217:                if (fElementDepth <= fRemovalElementDepth) {
218:                    super .comment(text, augs);
219:                }
220:            } // comment(XMLString,Augmentations)
221:
222:            /** Processing instruction. */
223:            public void processingInstruction(String target, XMLString data,
224:                    Augmentations augs) throws XNIException {
225:                if (fElementDepth <= fRemovalElementDepth) {
226:                    super .processingInstruction(target, data, augs);
227:                }
228:            } // processingInstruction(String,XMLString,Augmentations)
229:
230:            /** Characters. */
231:            public void characters(XMLString text, Augmentations augs)
232:                    throws XNIException {
233:                if (fElementDepth <= fRemovalElementDepth) {
234:                    super .characters(text, augs);
235:                }
236:            } // characters(XMLString,Augmentations)
237:
238:            /** Ignorable whitespace. */
239:            public void ignorableWhitespace(XMLString text, Augmentations augs)
240:                    throws XNIException {
241:                if (fElementDepth <= fRemovalElementDepth) {
242:                    super .ignorableWhitespace(text, augs);
243:                }
244:            } // ignorableWhitespace(XMLString,Augmentations)
245:
246:            /** Start general entity. */
247:            public void startGeneralEntity(String name,
248:                    XMLResourceIdentifier id, String encoding,
249:                    Augmentations augs) throws XNIException {
250:                if (fElementDepth <= fRemovalElementDepth) {
251:                    super .startGeneralEntity(name, id, encoding, augs);
252:                }
253:            } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
254:
255:            /** Text declaration. */
256:            public void textDecl(String version, String encoding,
257:                    Augmentations augs) throws XNIException {
258:                if (fElementDepth <= fRemovalElementDepth) {
259:                    super .textDecl(version, encoding, augs);
260:                }
261:            } // textDecl(String,String,Augmentations)
262:
263:            /** End general entity. */
264:            public void endGeneralEntity(String name, Augmentations augs)
265:                    throws XNIException {
266:                if (fElementDepth <= fRemovalElementDepth) {
267:                    super .endGeneralEntity(name, augs);
268:                }
269:            } // endGeneralEntity(String,Augmentations)
270:
271:            /** Start CDATA section. */
272:            public void startCDATA(Augmentations augs) throws XNIException {
273:                if (fElementDepth <= fRemovalElementDepth) {
274:                    super .startCDATA(augs);
275:                }
276:            } // startCDATA(Augmentations)
277:
278:            /** End CDATA section. */
279:            public void endCDATA(Augmentations augs) throws XNIException {
280:                if (fElementDepth <= fRemovalElementDepth) {
281:                    super .endCDATA(augs);
282:                }
283:            } // endCDATA(Augmentations)
284:
285:            /** End element. */
286:            public void endElement(QName element, Augmentations augs)
287:                    throws XNIException {
288:                if (fElementDepth <= fRemovalElementDepth
289:                        && elementAccepted(element.rawname)) {
290:                    super .endElement(element, augs);
291:                }
292:                fElementDepth--;
293:                if (fElementDepth == fRemovalElementDepth) {
294:                    fRemovalElementDepth = Integer.MAX_VALUE;
295:                }
296:            } // endElement(QName,Augmentations)
297:
298:            /** End prefix mapping. */
299:            public void endPrefixMapping(String prefix, Augmentations augs)
300:                    throws XNIException {
301:                if (fElementDepth <= fRemovalElementDepth) {
302:                    super .endPrefixMapping(prefix, augs);
303:                }
304:            } // endPrefixMapping(String,Augmentations)
305:
306:            //
307:            // Protected methods
308:            //
309:
310:            /** Returns true if the specified element is accepted. */
311:            protected boolean elementAccepted(String element) {
312:                Object key = element.toLowerCase();
313:                return fAcceptedElements.containsKey(key);
314:            } // elementAccepted(String):boolean
315:
316:            /** Returns true if the specified element should be removed. */
317:            protected boolean elementRemoved(String element) {
318:                Object key = element.toLowerCase();
319:                return fRemovedElements.containsKey(key);
320:            } // elementRemoved(String):boolean
321:
322:            /** Handles an open tag. */
323:            protected boolean handleOpenTag(QName element,
324:                    XMLAttributes attributes) {
325:                if (elementAccepted(element.rawname)) {
326:                    Object key = element.rawname.toLowerCase();
327:                    Object value = fAcceptedElements.get(key);
328:                    if (value != NULL) {
329:                        String[] anames = (String[]) value;
330:                        int attributeCount = attributes.getLength();
331:                        LOOP: for (int i = 0; i < attributeCount; i++) {
332:                            String aname = attributes.getQName(i).toLowerCase();
333:                            for (int j = 0; j < anames.length; j++) {
334:                                if (anames[j].equals(aname)) {
335:                                    continue LOOP;
336:                                }
337:                            }
338:                            attributes.removeAttributeAt(i--);
339:                            attributeCount--;
340:                        }
341:                    } else {
342:                        attributes.removeAllAttributes();
343:                    }
344:                    return true;
345:                } else if (elementRemoved(element.rawname)) {
346:                    fRemovalElementDepth = fElementDepth;
347:                }
348:                return false;
349:            } // handleOpenTag(QName,XMLAttributes):boolean
350:
351:        } // class DefaultFilter
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.