01: /*
02: * Copyright 2002-2008 Andy Clark
03: *
04: * Licensed under the Apache License, Version 2.0 (the "License");
05: * you may not use this file except in compliance with the License.
06: * You may obtain a copy of the License at
07: *
08: * http://www.apache.org/licenses/LICENSE-2.0
09: *
10: * Unless required by applicable law or agreed to in writing, software
11: * distributed under the License is distributed on an "AS IS" BASIS,
12: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13: * See the License for the specific language governing permissions and
14: * limitations under the License.
15: */
16:
17: package sample;
18:
19: import org.cyberneko.html.HTMLConfiguration;
20: import org.cyberneko.html.filters.ElementRemover;
21:
22: import org.apache.xerces.xni.parser.XMLDocumentFilter;
23: import org.apache.xerces.xni.parser.XMLInputSource;
24: import org.apache.xerces.xni.parser.XMLParserConfiguration;
25:
26: /**
27: * This is a sample that illustrates how to use the
28: * <code>ElementRemover</code> filter.
29: *
30: * @author Andy Clark
31: *
32: * @version $Id: RemoveElements.java,v 1.3 2004/02/19 20:00:17 andyc Exp $
33: */
34: public class RemoveElements {
35:
36: //
37: // MAIN
38: //
39:
40: /** Main. */
41: public static void main(String[] argv) throws Exception {
42:
43: // create element remover filter
44: ElementRemover remover = new ElementRemover();
45:
46: // set which elements to accept
47: remover.acceptElement("b", null);
48: remover.acceptElement("i", null);
49: remover.acceptElement("u", null);
50: remover.acceptElement("a", new String[] { "href" });
51:
52: // completely remove script elements
53: remover.removeElement("script");
54:
55: // create writer filter
56: org.cyberneko.html.filters.Writer writer = new org.cyberneko.html.filters.Writer();
57:
58: // setup filter chain
59: XMLDocumentFilter[] filters = { remover, writer, };
60:
61: // create HTML parser
62: XMLParserConfiguration parser = new HTMLConfiguration();
63: parser
64: .setProperty(
65: "http://cyberneko.org/html/properties/filters",
66: filters);
67:
68: // parse documents
69: for (int i = 0; i < argv.length; i++) {
70: String systemId = argv[i];
71: XMLInputSource source = new XMLInputSource(null, systemId,
72: null);
73: parser.parse(source);
74: }
75:
76: } // main(String[])
77:
78: } // class RemoveElements
|