001: /* ====================================================================
002: * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
003: *
004: * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by Jcorporate Ltd.
021: * (http://www.jcorporate.com/)."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. "Jcorporate" and product names such as "Expresso" must
026: * not be used to endorse or promote products derived from this
027: * software without prior written permission. For written permission,
028: * please contact info@jcorporate.com.
029: *
030: * 5. Products derived from this software may not be called "Expresso",
031: * or other Jcorporate product names; nor may "Expresso" or other
032: * Jcorporate product names appear in their name, without prior
033: * written permission of Jcorporate Ltd.
034: *
035: * 6. No product derived from this software may compete in the same
036: * market space, i.e. framework, without prior written permission
037: * of Jcorporate Ltd. For written permission, please contact
038: * partners@jcorporate.com.
039: *
040: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
041: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
042: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
043: * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
044: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
045: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
046: * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
047: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
048: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
049: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
050: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
051: * SUCH DAMAGE.
052: * ====================================================================
053: *
054: * This software consists of voluntary contributions made by many
055: * individuals on behalf of the Jcorporate Ltd. Contributions back
056: * to the project(s) are encouraged when you make modifications.
057: * Please send them to support@jcorporate.com. For more information
058: * on Jcorporate Ltd. and its products, please see
059: * <http://www.jcorporate.com/>.
060: *
061: * Portions of this software are based upon other open source
062: * products and are subject to their respective licenses.
063: */
064:
065: package com.jcorporate.expresso.core.security.filters;
066:
067: import com.jcorporate.expresso.core.controller.ControllerRequest;
068: import com.jcorporate.expresso.core.controller.ServletControllerRequest;
069: import com.jcorporate.expresso.core.db.DBConnection;
070: import com.jcorporate.expresso.core.misc.StringUtil;
071: import com.jcorporate.expresso.services.dbobj.Setup;
072:
073: import javax.servlet.http.HttpServletRequest;
074:
075: /**
076: * This class provides a filter implementation for HTML output, protecting against
077: * XSS exploits, plus it creates anchor (<a>) tags for anything that starts
078: * with 'http://', 'www.', etc.
079: *
080: * @author Larry Hamel and Patti Schank
081: */
082: public class HtmlPlusURLFilter extends HtmlFilter {
083:
084: // URL types to search for (to add anchor tag)
085: public static final String[] URL_TYPES = { "http://", "https://",
086: "ftp://", "mailto:", "news:" };
087:
088: // These will be prepended with 'http://"
089: public static final String[] URL_INFORMAL_PREFIXES = { "www.",
090: "www2." };
091:
092: /**
093: * Name for Setup value which decides if we are limiting anchor labels
094: */
095: public static final String MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
096:
097: /**
098: * No-args constructor required
099: */
100: public HtmlPlusURLFilter() throws IllegalArgumentException {
101: }
102:
103: /**
104: * Constructor for passing strings and their replacements
105: *
106: * @param specialStringList Strings to replace
107: * @param replaceList The replacement strings
108: */
109: public HtmlPlusURLFilter(String[] specialStringList,
110: String[] replaceList) throws IllegalArgumentException {
111: super (specialStringList, replaceList);
112: }
113:
114: /**
115: * This filter HTML encodes all special characters defined by the replacement
116: * list. If a particular character doesn't exist in the map, then the chracter
117: * is passed appended into the result set.
118: * If it does exist, then the value the special character maps to is appended
119: * into the list instead.
120: *
121: * @param data The string to encode.
122: * @return The filtered string
123: */
124: public String standardFilter(String data) {
125: String result = super .standardFilter(data);
126:
127: return insertHrefTags(result);
128: } /* standardFilter(String) */
129:
130: /**
131: * Given a url string, if it's null or equals "" then just return
132: * it as is. Otherwise check if it is valid form, that is, starts
133: * with http:// or ftp:// or some other valid url prefix. If not,
134: * prepend http://.
135: *
136: * @param url The url string
137: * @return The url with http:// prepended, if needed
138: */
139: public static String addHttpPrefixIfNeeded(String url) {
140: if (StringUtil.isBlankOrNull(url)) {
141: return url;
142: }
143: String validUrl = url;
144: if (!hasValidUrlPrefix(url)) {
145: validUrl = "http://" + url;
146: }
147: return validUrl;
148: }
149:
150: /**
151: * Return true if the url has a valid prefix, like http://
152: *
153: * @param url
154: * @return
155: */
156: public static boolean hasValidUrlPrefix(String url) {
157: boolean valid = false;
158: for (int i = 0; i < URL_TYPES.length; i++) {
159: if (url.startsWith(URL_TYPES[i])) {
160: valid = true;
161: }
162: }
163: return valid;
164: }
165:
166: /**
167: * Return true if the url is valid. Checks that it is not
168: * null, that it has a valid prefix, and that it contains
169: * a dot (must, to have a domain name) and at least 2 characters
170: * after the dot (the domain). Add more tests here as appropriate.
171: *
172: * @param url The candidate URL to check
173: * @return True if the URL is valid
174: */
175: public static boolean isValidUrl(String url) {
176: if (StringUtil.isBlankOrNull(url)) {
177: return false;
178: }
179: if (!hasValidUrlPrefix(url)) {
180: return false;
181: }
182: // does it have a domain name after a dot?
183: int dotIndex = url.indexOf(".");
184: if (dotIndex < 0) {
185: return false;
186: }
187: String domain = url.substring(dotIndex);
188: if (domain.length() < 2) {
189: return false;
190: }
191: return true;
192: }
193:
194: /**
195: * Get web server address
196: *
197: * @return the address of this web server
198: */
199: public static String getWebHostPort(ControllerRequest request) {
200: ServletControllerRequest sreq = (ServletControllerRequest) request;
201: HttpServletRequest hreq = (HttpServletRequest) sreq
202: .getServletRequest();
203: String serverDomainName = hreq.getServerName();
204: int serverPort = hreq.getServerPort();
205:
206: if (serverPort != 80) {
207: serverDomainName = serverDomainName + ":" + serverPort;
208: }
209: return serverDomainName;
210: }
211:
212: /**
213: * Insert a href tag around any http, https, www, or www2 strings
214: *
215: * @param s The string to search in and insert
216: * @return A String with <a href></a> tags and http:// if needed
217: */
218: public static String insertHrefTags(String s) {
219: boolean appendHttp = false;
220: String result = s;
221:
222: int hIndex = -1;
223: for (int i = 0; i < URL_TYPES.length; i++) {
224: String urlType = URL_TYPES[i];
225: hIndex = s.indexOf(urlType);
226: if (hIndex != -1) {
227: break;
228: }
229: }
230:
231: if (hIndex == -1) {
232: for (int i = 0; i < URL_INFORMAL_PREFIXES.length; i++) {
233: String urlType = URL_INFORMAL_PREFIXES[i];
234: hIndex = s.indexOf(urlType);
235: if (hIndex != -1) {
236: appendHttp = true;
237: break;
238: }
239:
240: hIndex = s.indexOf(urlType.toUpperCase());
241: if (hIndex != -1) {
242: appendHttp = true;
243: break;
244: }
245: }
246:
247: }
248:
249: if (hIndex >= 0) {
250: int endIndex = findEndOfHref(s, hIndex);
251: String href = s.substring(hIndex, endIndex);
252:
253: // XSS protection: cannot have '<', and we have already stripped for this
254: // in initial filtering. However, within anchor, cannot have equivalent
255: // hex or < within URL, so that <script> cannot be entered
256: // todo use String.replace() regexp in jdk1.4 after expresso raises requirements
257: href = StringUtil.replaceAll(href, "<", "");
258: href = StringUtil.replaceAll(href, "<", "");
259: href = StringUtil.replaceAll(href, "&lT;", "");
260: href = StringUtil.replaceAll(href, "≪", "");
261: href = StringUtil.replaceAll(href, "%3c", "");
262: href = StringUtil.replaceAll(href, "%3C", "");
263:
264: StringBuffer link = new StringBuffer();
265: link.append(" <a href=\"");
266: if (appendHttp) {
267: link.append("http://");
268: }
269: link.append(href);
270: link.append("\" target=\"_blank\">");
271:
272: // If string is long, use ellipses if this setup value is set
273: // Unfortunately, we don't have access to dbname here
274: String max = Setup.getValueUnrequired(
275: DBConnection.DEFAULT_DB_CONTEXT_NAME,
276: MAX_CHARS_IN_URL_LABEL);
277: if (max != null) {
278: try {
279: int maxchars = Integer.parseInt(max);
280: if (href.length() > maxchars) {
281: link.append(href.substring(0, maxchars));
282: link.append("…"); // ellipses
283: } else {
284: link.append(href);
285: }
286: } catch (Exception e) {
287: e.printStackTrace();
288: }
289: } else {
290: link.append(href);
291: }
292: link.append("</a>");
293:
294: String linksBefore = "";
295: String linksAfter = "";
296:
297: // recurse
298: if (hIndex > 5) {
299: linksBefore = insertHrefTags(s.substring(0, hIndex));
300: }
301: if (endIndex != s.length()) {
302: linksAfter = insertHrefTags(s.substring(endIndex));
303: }
304:
305: return linksBefore + link.toString() + linksAfter;
306:
307: } else {
308: return result;
309: }
310: }
311:
312: /**
313: * Finds the end of a hyperlink
314: *
315: * @param s The string
316: * @param start The url's starting index
317: */
318: public static int findEndOfHref(String s, int start) {
319: char[] chars = s.toCharArray();
320: int end = s.length();
321:
322: for (int i = start; i < end; i++) {
323: char c = chars[i];
324:
325: if (Character.isLetterOrDigit(c)) {
326: continue;
327: }
328:
329: /**
330: * Legal punctuation in URLs (see RFC 2396
331: * ftp://ftp.isi.edu/in-notes/rfc2396.txt)
332: ; / ? : @ & = +
333: $ , - _" . ! ~ * ' ( )
334: % #
335: */
336: switch (c) { // legal punctuation in URLS
337: case '.':
338: case ',':
339: case ')':
340: case '(':
341: case '@':
342: case '?':
343: case '&':
344: case '=':
345: case '-':
346: case '_':
347: case '/':
348: case '#':
349: case ':':
350: case '~':
351: case '+':
352: case ';':
353: case '!':
354: case '*':
355: case '\'':
356: case '$':
357: continue; // legal punctuation in URL
358:
359: case '%':
360: // Special case, indicates a URL encoding follows
361: // Malicious XSS could abuse encoding to slip scripts
362: // through. Only allow encoding of safe hex characters
363: if ((i < (end - 2))
364: && (isSafeURLEncoding(chars[i + 1],
365: chars[i + 2]))) {
366: continue;
367: } else {
368: return i; // unsafe URL encoding
369: }
370:
371: default:
372: return i; // illegal punctuation--must be end of URL
373: }
374: }
375: return end;
376: }
377:
378: /**
379: * Return true if we allow the given URL encoding (after a %).
380: * See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
381: * To stop XSS attacks, definitely don't allow:
382: * the less than symbol < (3C) and the greater than symbol > (3E)
383: * 00-1F hex (ascii control characters)
384: * 80-FF hex (non-ascii, by definition not legal)
385: * <p/>
386: * For extra safety, let's not allow the following (add later if needed)
387: * quote (%27), left paren (%28), right paren (%29)
388: * left bracket (7B), right bracket (7D)
389: * <p/>
390: * Okay to allow as encoded (might be misunderstood within URLS):s
391: * space (20), ! (21), " (22), # (23), $ (24)
392: * % (25), & (26), * (2A), + (2B), comma (2C)
393: * - (2D), period (2E), / (2F), : (3A), ; (3B),
394: * = (3D), ? (3F), @ (40)
395: * | (7C), \ (5C), ~ (7E)
396: * <p/>
397: * The following are also okay, but shouldn't be encoded anyway, so don't
398: * bother checking for these for now:
399: * digits: 30-39
400: * uppercase letters: 41 - 5A
401: * lowercase letters: 61 - 7A
402: */
403: private static boolean isSafeURLEncoding(char c1, char c2) {
404: String[] allowedEncodings = { "20", "21", "22", "23", "24",
405: "25", "26", "2A", "2B", "2C", "2D", "2E", "2F", "3A",
406: "3B", "3D", "3F", "40", "7C", "5C", "7E" };
407:
408: String encodedCharStr = String.valueOf(c1) + String.valueOf(c2);
409:
410: // test for containment of safe encoding characters
411: for (int i = 0; i < allowedEncodings.length; i++) {
412: if (encodedCharStr.startsWith(allowedEncodings[i])) {
413: return true;
414: }
415: }
416: // otherwise assume encoded characters are unsafe
417: return false;
418: }
419: }
|