001: /* Copyright 2006, 2007 The JA-SIG Collaborative. All rights reserved.
002: * See license distributed with this file and
003: * available online at http://www.uportal.org/license.html
004: */
005:
006: package org.jasig.portal.channels;
007:
008: import java.util.Arrays;
009: import java.util.Collections;
010: import java.util.HashSet;
011: import java.util.Set;
012:
013: import org.w3c.dom.Document;
014: import org.w3c.dom.Element;
015: import org.w3c.dom.Node;
016: import org.xml.sax.Attributes;
017: import org.xml.sax.ContentHandler;
018: import org.xml.sax.Locator;
019: import org.xml.sax.SAXException;
020:
021: /**
022: * ContentHandler that will produce a document that only includes
023: * a white listed list of elements, attributes and URL schemes. Only
024: * HTML that is considered to be safe from cross-site scripting
025: * attacks is passed on to the document.
026: *
027: */
028:
029: public class SaferHTMLHandler implements ContentHandler {
030:
031: Node currentNode;
032: StringBuffer chars = new StringBuffer();
033: Document doc;
034:
035: /*
036: * See FeedParser for information on sanitizing HTML:
037: * http://feedparser.org/docs/html-sanitization.html#advanced.sanitization.why
038: */
039:
040: private static final String[] SAFE_ELEMNTS = { "a", "abbr",
041: "acronym", "address", "area", "b", "big", "blockquote",
042: "br", "button", "caption", "center", "cite", "code", "col",
043: "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt",
044: "em", "fieldset", "font", "form", "h1", "h2", "h3", "h4",
045: "h5", "h6", "hr", "i", "img", "input", "ins", "kbd",
046: "label", "legend", "li", "map", "menu", "ol", "optgroup",
047: "option", "p", "pre", "q", "s", "samp", "select", "small",
048: "span", "strike", "strong", "sub", "sup", "table", "tbody",
049: "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u",
050: "ul", "var" };
051:
052: private static final String[] SAFE_ATTS = { "abbr", "accept",
053: "accept-charset", "accesskey", "action", "align", "alt",
054: "axis", "border", "cellpadding", "cellspacing", "char",
055: "charoff", "charset", "checked", "cite", "class", "clear",
056: "cols", "colspan", "color", "compact", "coords",
057: "datetime", "dir", "disabled", "enctype", "for", "frame",
058: "headers", "height", "href", "hreflang", "hspace", "id",
059: "ismap", "label", "lang", "longdesc", "maxlength", "media",
060: "method", "multiple", "name", "nohref", "noshade",
061: "nowrap", "prompt", "readonly", "rel", "rev", "rows",
062: "rowspan", "rules", "scope", "selected", "shape", "size",
063: "span", "src", "start", "summary", "tabindex", "target",
064: "title", "type", "usemap", "valign", "value", "vspace",
065: "width" };
066:
067: private static final String[] SAFE_URL_SCHEMES = { "http", "https",
068: "ftp", "mailto" };
069:
070: private static final Set<String> SAFE_ELEMENTS_SET = Collections
071: .unmodifiableSet(new HashSet<String>(Arrays
072: .asList(SAFE_ELEMNTS)));
073:
074: private static final Set<String> SAFE_ATTS_SET = Collections
075: .unmodifiableSet(new HashSet<String>(Arrays
076: .asList(SAFE_ATTS)));
077:
078: private static final Set<String> SAFE_URL_SCHEMES_SET = Collections
079: .unmodifiableSet(new HashSet<String>(Arrays
080: .asList(SAFE_URL_SCHEMES)));
081:
082: public SaferHTMLHandler(Document doc, Node root) {
083: this .doc = doc;
084: currentNode = root;
085: }
086:
087: public void setDocumentLocator(Locator locator) {
088: }
089:
090: public void startDocument() throws SAXException {
091: }
092:
093: public void endDocument() throws SAXException {
094: Node n = doc.createTextNode(chars.toString());
095: chars = new StringBuffer();
096: currentNode.appendChild(n);
097: }
098:
099: public void startPrefixMapping(String prefix, String uri)
100: throws SAXException {
101: }
102:
103: public void endPrefixMapping(String prefix) throws SAXException {
104: }
105:
106: public void startElement(String uri, String localName,
107: String qName, Attributes atts) throws SAXException {
108: if (SAFE_ELEMENTS_SET.contains(qName)) {
109: // all okay
110: if (chars.length() > 0) {
111: Node n = doc.createTextNode(chars.toString());
112: chars = new StringBuffer();
113: currentNode.appendChild(n);
114: }
115: Element temp = doc.createElement(qName);
116:
117: // loop through each attribute
118: int length = atts.getLength();
119: for (int i = 0; i < length; i++) {
120:
121: String attrName = atts.getQName(i);
122: String value = atts.getValue(i);
123:
124: // only copy safe attributes
125: if (SAFE_ATTS_SET.contains(attrName) && value != null) {
126: // special handling for src and href attributes
127: if (attrName.toLowerCase().trim().equals("src")
128: || attrName.toLowerCase().trim().equals(
129: "href")) {
130: value = sanitizeURL(value);
131: }
132: // safe so we set the attribute on the document
133: temp.setAttribute(attrName, value);
134: }
135:
136: }
137: currentNode.appendChild(temp);
138: currentNode = temp;
139: }
140: }
141:
142: public void endElement(String uri, String localName, String qName)
143: throws SAXException {
144: if (SAFE_ELEMENTS_SET.contains(qName)) {
145: if (chars.length() > 0) {
146: Node n = doc.createTextNode(chars.toString());
147: chars = new StringBuffer();
148: currentNode.appendChild(n);
149: }
150: currentNode = currentNode.getParentNode();
151: }
152: }
153:
154: public void characters(char[] ch, int start, int length)
155: throws SAXException {
156: chars.append(ch, start, length);
157: }
158:
159: public void ignorableWhitespace(char[] ch, int start, int length)
160: throws SAXException {
161: }
162:
163: public void processingInstruction(String target, String data)
164: throws SAXException {
165: }
166:
167: public void skippedEntity(String name) throws SAXException {
168: }
169:
170: /**
171: * Returns just the scheme portion of a URL. Forces
172: * the scheme to be all lower case.
173: */
174: private static String parseScheme(String url) {
175: String scheme = "";
176: if (url != null) {
177: url = url.trim();
178: int pos = url.indexOf(':');
179: if (pos >= 0) {
180: scheme = url.substring(0, pos);
181: }
182: scheme = scheme.toLowerCase();
183: }
184: return scheme;
185: }
186:
187: /**
188: * Make sure to only allow safe URL schemes.
189: * This includes http, https, ftp, mailto. This will
190: * prevent dangerous javascript URLs and other things
191: * we never even thought about. Returns url unaltered
192: * if the scheme is save. Returns empty string if the
193: * scheme is unsafe.
194: *
195: * We could add more URL schemes if we determine they are
196: * need and safe.
197: */
198:
199: public static String sanitizeURL(String url) {
200: String scheme = parseScheme(url);
201: if (SAFE_URL_SCHEMES_SET.contains(scheme)) {
202: return url;
203: }
204: return "";
205: }
206: }
|