001: /*
002: * Copyright (c) JForum Team
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms,
006: * with or without modification, are permitted provided
007: * that the following conditions are met:
008: *
009: * 1) Redistributions of source code must retain the above
010: * copyright notice, this list of conditions and the
011: * following disclaimer.
012: * 2) Redistributions in binary form must reproduce the
013: * above copyright notice, this list of conditions and
014: * the following disclaimer in the documentation and/or
015: * other materials provided with the distribution.
016: * 3) Neither the name of "Rafael Steil" nor
017: * the names of its contributors may be used to endorse
018: * or promote products derived from this software without
019: * specific prior written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
022: * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
023: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
024: * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
025: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
026: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
027: * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
028: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
029: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
030: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
031: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
032: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
033: * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
034: * IN CONTRACT, STRICT LIABILITY, OR TORT
035: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
036: * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
037: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
038: *
039: * This file creation date: 27/09/2004 23:59:10
040: * The JForum Project
041: * http://www.jforum.net
042: */
043: package net.jforum.util;
044:
045: import java.util.HashSet;
046: import java.util.Iterator;
047: import java.util.Set;
048: import java.util.Vector;
049:
050: import net.jforum.exceptions.ForumException;
051: import net.jforum.util.preferences.ConfigKeys;
052: import net.jforum.util.preferences.SystemGlobals;
053: import net.jforum.view.forum.common.ViewCommon;
054:
055: import org.htmlparser.Attribute;
056: import org.htmlparser.Node;
057: import org.htmlparser.Tag;
058: import org.htmlparser.lexer.Lexer;
059: import org.htmlparser.nodes.TextNode;
060:
061: /**
062: * Process text with html and remove possible malicious tags and attributes.
063: * Work based on tips from Amit Klein and the following documents:
064: * <br>
065: * <li>http://ha.ckers.org/xss.html
066: * <li>http://quickwired.com/kallahar/smallprojects/php_xss_filter_function.php
067: * <br>
068: * @author Rafael Steil
069: * @version $Id: SafeHtml.java,v 1.25 2007/09/19 14:08:57 rafaelsteil Exp $
070: */
071: public class SafeHtml {
072: private static Set welcomeTags;
073: private static Set welcomeAttributes;
074: private static Set allowedProtocols;
075:
076: static {
077: welcomeTags = new HashSet();
078: welcomeAttributes = new HashSet();
079: allowedProtocols = new HashSet();
080:
081: splitAndTrim(ConfigKeys.HTML_TAGS_WELCOME, welcomeTags);
082: splitAndTrim(ConfigKeys.HTML_ATTRIBUTES_WELCOME,
083: welcomeAttributes);
084: splitAndTrim(ConfigKeys.HTML_LINKS_ALLOW_PROTOCOLS,
085: allowedProtocols);
086: }
087:
088: private static void splitAndTrim(String s, Set data) {
089: String s1 = SystemGlobals.getValue(s);
090:
091: if (s1 == null) {
092: return;
093: }
094:
095: String[] tags = s1.toUpperCase().split(",");
096:
097: for (int i = 0; i < tags.length; i++) {
098: data.add(tags[i].trim());
099: }
100: }
101:
102: /**
103: * Given an input, analyze each HTML tag and remove unsecure attributes from them.
104: * @param contents The content to verify
105: * @return the content, secure.
106: */
107: public String ensureAllAttributesAreSafe(String contents) {
108: StringBuffer sb = new StringBuffer(contents.length());
109:
110: try {
111: Lexer lexer = new Lexer(contents);
112: Node node;
113:
114: while ((node = lexer.nextNode()) != null) {
115: if (node instanceof Tag) {
116: Tag tag = (Tag) node;
117:
118: this .checkAndValidateAttributes(tag, false);
119:
120: sb.append(tag.toHtml());
121: } else {
122: sb.append(node.toHtml());
123: }
124: }
125: } catch (Exception e) {
126: throw new ForumException("Problems while parsing HTML: "
127: + e, e);
128: }
129:
130: return sb.toString();
131: }
132:
133: /**
134: * Given an input, makes it safe for HTML displaying.
135: * Removes any not allowed HTML tag or attribute, as well
136: * unwanted Javascript statements inside the tags.
137: * @param contents the input to analyze
138: * @return the modified and safe string
139: */
140: public String makeSafe(String contents) {
141: if (contents == null || contents.length() == 0) {
142: return contents;
143: }
144:
145: StringBuffer sb = new StringBuffer(contents.length());
146:
147: try {
148: Lexer lexer = new Lexer(contents);
149: Node node;
150:
151: while ((node = lexer.nextNode()) != null) {
152: boolean isTextNode = node instanceof TextNode;
153:
154: if (isTextNode) {
155: // Text nodes are raw data, so we just
156: // strip off all possible html content
157: String text = node.toHtml();
158:
159: if (text.indexOf('>') > -1
160: || text.indexOf('<') > -1) {
161: StringBuffer tmp = new StringBuffer(text);
162:
163: ViewCommon.replaceAll(tmp, "<", "<");
164: ViewCommon.replaceAll(tmp, ">", ">");
165: ViewCommon.replaceAll(tmp, "\"", """);
166:
167: node.setText(tmp.toString());
168: }
169: }
170:
171: if (isTextNode
172: || (node instanceof Tag && this
173: .isTagWelcome(node))) {
174: sb.append(node.toHtml());
175: } else {
176: StringBuffer tmp = new StringBuffer(node.toHtml());
177:
178: ViewCommon.replaceAll(tmp, "<", "<");
179: ViewCommon.replaceAll(tmp, ">", ">");
180:
181: sb.append(tmp.toString());
182: }
183: }
184: } catch (Exception e) {
185: throw new ForumException("Error while parsing HTML: " + e,
186: e);
187: }
188:
189: return sb.toString();
190: }
191:
192: /**
193: * Returns true if a given tag is allowed.
194: * Also, it checks and removes any unwanted attribute the tag may contain.
195: * @param node The tag node to analyze
196: * @return true if it is a valid tag.
197: */
198: private boolean isTagWelcome(Node node) {
199: Tag tag = (Tag) node;
200:
201: if (!welcomeTags.contains(tag.getTagName())) {
202: return false;
203: }
204:
205: this .checkAndValidateAttributes(tag, true);
206:
207: return true;
208: }
209:
210: /**
211: * Given a tag, check its attributes, removing those unwanted or not secure
212: * @param tag The tag to analyze
213: * @param checkIfAttributeIsWelcome true if the attribute name should be matched
214: * against the list of welcome attributes, set in the main configuration file.
215: */
216: private void checkAndValidateAttributes(Tag tag,
217: boolean checkIfAttributeIsWelcome) {
218: Vector newAttributes = new Vector();
219:
220: for (Iterator iter = tag.getAttributesEx().iterator(); iter
221: .hasNext();) {
222: Attribute a = (Attribute) iter.next();
223:
224: String name = a.getName();
225:
226: if (name == null) {
227: newAttributes.add(a);
228: } else {
229: name = name.toUpperCase();
230:
231: if (a.getValue() == null) {
232: newAttributes.add(a);
233: continue;
234: }
235:
236: String value = a.getValue().toLowerCase();
237:
238: if (checkIfAttributeIsWelcome
239: && !this .isAttributeWelcome(name)) {
240: continue;
241: }
242:
243: if (!this .isAttributeSafe(name, value)) {
244: continue;
245: }
246:
247: if (a.getValue().indexOf("&#") > -1) {
248: a.setValue(a.getValue().replaceAll("&#", "&#"));
249: }
250:
251: newAttributes.add(a);
252: }
253: }
254:
255: tag.setAttributesEx(newAttributes);
256: }
257:
258: /**
259: * Check if the given attribute name is in the list of allowed attributes
260: * @param name the attribute name
261: * @return true if it is an allowed attribute name
262: */
263: private boolean isAttributeWelcome(String name) {
264: return welcomeAttributes.contains(name);
265: }
266:
267: /**
268: * Check if the attribute is safe, checking either its name and value.
269: * @param name the attribute name
270: * @param value the attribute value
271: * @return true if it is a safe attribute
272: */
273: private boolean isAttributeSafe(String name, String value) {
274: if (name.length() >= 2 && name.charAt(0) == 'O'
275: && name.charAt(1) == 'N') {
276: return false;
277: }
278:
279: if (value.indexOf('\n') > -1 || value.indexOf('\r') > -1
280: || value.indexOf('\0') > -1) {
281: return false;
282: }
283:
284: if (("HREF".equals(name) || "SRC".equals(name))) {
285: if (!this .isHrefValid(value)) {
286: return false;
287: }
288: } else if ("STYLE".equals(name)) {
289: // It is much more a try to not allow constructions
290: // like style="background-color: url(javascript:xxxx)" than anything else
291: if (value.indexOf('(') > -1) {
292: return false;
293: }
294: }
295:
296: return true;
297: }
298:
299: /**
300: * Checks if a given address is valid
301: * @param href The address to check
302: * @return true if it is valid
303: */
304: private boolean isHrefValid(String href) {
305: if (SystemGlobals
306: .getBoolValue(ConfigKeys.HTML_LINKS_ALLOW_RELATIVE)
307: && href.length() > 0 && href.charAt(0) == '/') {
308: return true;
309: }
310:
311: for (Iterator iter = allowedProtocols.iterator(); iter
312: .hasNext();) {
313: String protocol = iter.next().toString().toLowerCase();
314:
315: if (href.startsWith(protocol)) {
316: return true;
317: }
318: }
319:
320: return false;
321: }
322: }
|