001: /* CVS ID: $Id: JavaScriptCleaner.java,v 1.1.1.1 2002/10/02 18:42:49 wastl Exp $ */
002: package net.wastl.webmail.misc;
003:
004: import org.w3c.dom.*;
005:
006: /*
007: * JavaScriptCleaner.java
008: *
009: * Created: Mon Jan 1 15:20:54 2001
010: *
011: * Copyright (C) 1999-2001 Sebastian Schaffert
012: *
013: * This program is free software; you can redistribute it and/or
014: * modify it under the terms of the GNU General Public License
015: * as published by the Free Software Foundation; either version 2
016: * of the License, or (at your option) any later version.
017: *
018: * This program is distributed in the hope that it will be useful,
019: * but WITHOUT ANY WARRANTY; without even the implied warranty of
020: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
021: * GNU General Public License for more details.
022: *
023: * You should have received a copy of the GNU General Public License
024: * along with this program; if not, write to the Free Software
025: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
026: */
027:
028: /**
029: * JavaScriptCleaner.java
030: *
031: * This class removes hopefully all of the possible malicious code from HTML messages
032: * like <SCRIPT> tags, javascript: hrefs and onMouseOver, ...;
033: *
034: * Furthermore, we should consider removing all IMG tags as they might be used to call CGIs
035: *
036: * Created: Mon Jan 1 15:20:54 2001
037: *
038: * @author Sebastian Schaffert
039: * @version
040: */
041:
042: public class JavaScriptCleaner {
043:
044: Document d;
045:
046: public JavaScriptCleaner(Document d) {
047: this .d = d;
048: walkTree(d.getDocumentElement());
049: }
050:
051: protected void walkTree(Node node) {
052:
053: /* First we check for element types that shouldn't be sent to the user.
054: For that, we add an attribute "malicious" that can be handled by the XSLT
055: stylesheets that display the message.
056: */
057: if (node instanceof Element
058: && ((Element) node).getTagName().toUpperCase().equals(
059: "SCRIPT")) {
060:
061: ((Element) node)
062: .setAttribute("malicious",
063: "Marked malicious because of potential JavaScript abuse");
064: }
065:
066: if (node instanceof Element
067: && ((Element) node).getTagName().toUpperCase().equals(
068: "IMG")) {
069:
070: ((Element) node)
071: .setAttribute("malicious",
072: "Marked malicious because of potential Image/CGI abuse");
073: }
074:
075: /* What we also really don't like in HTML messages are FORMs! */
076:
077: if (node instanceof Element
078: && ((Element) node).getTagName().toUpperCase().equals(
079: "FORM")) {
080: ((Element) node)
081: .setAttribute("malicious",
082: "Marked malicious because of potential JavaScript abuse");
083: }
084:
085: /* Now we search the attribute list for attributes that may potentially be used maliciously.
086: These will be:
087: - href: check for a String containing "javascript"
088: - onXXX events: if they exist, the link will be marked "malicious".
089: */
090: String javascript_href = "javascript";
091: NamedNodeMap map = node.getAttributes();
092: for (int i = 0; i < map.getLength(); i++) {
093: Attr a = (Attr) map.item(i);
094: /* First case: look for hrefs containing "javascript" */
095: if (a.getName().toUpperCase().equals("HREF")) {
096: for (int j = 0; j < a.getValue().length()
097: - javascript_href.length(); j++) {
098: if (a.getValue().regionMatches(true, j,
099: javascript_href, 0,
100: javascript_href.length())) {
101: ((Element) node)
102: .setAttribute(
103: "malicious",
104: "Marked malicious because of potential JavaScript abuse (HREF attribute contains javascript code)");
105: break;
106: }
107: }
108: /* All elements containing "onXXX" tags get the malicious attribute immediately */
109: } else if (a.getName().toUpperCase().startsWith("ON")) {
110: ((Element) node)
111: .setAttribute(
112: "malicious",
113: "Marked malicious because of potential JavaScript abuse (element contains script events)");
114: }
115: }
116:
117: /* Do that recursively */
118: if (node.hasChildNodes()) {
119: NodeList nl = node.getChildNodes();
120: for (int i = 0; i < nl.getLength(); i++) {
121: walkTree(nl.item(i));
122: }
123: }
124: }
125:
126: } // JavaScriptCleaner
|