001: /**
002: * Copyright (c) 2003-2007, David A. Czarnecki
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * Redistributions of source code must retain the above copyright notice, this list of conditions and the
009: * following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
011: * following disclaimer in the documentation and/or other materials provided with the distribution.
012: * Neither the name of "David A. Czarnecki" and "blojsom" nor the names of its contributors may be used to
013: * endorse or promote products derived from this software without specific prior written permission.
014: * Products derived from this software may not be called "blojsom", nor may "blojsom" appear in their name,
015: * without prior written permission of David A. Czarnecki.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
018: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
019: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
021: * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
022: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
025: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026: * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
027: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
028: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
029: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: */package org.blojsom.plugin.filter;
031:
032: import org.apache.commons.logging.Log;
033: import org.apache.commons.logging.LogFactory;
034: import org.blojsom.blog.Blog;
035: import org.blojsom.blog.Entry;
036: import org.blojsom.event.Event;
037: import org.blojsom.event.EventBroadcaster;
038: import org.blojsom.event.Listener;
039: import org.blojsom.plugin.Plugin;
040: import org.blojsom.plugin.PluginException;
041: import org.blojsom.plugin.admin.event.ProcessEntryEvent;
042: import org.blojsom.plugin.comment.event.CommentResponseSubmissionEvent;
043: import org.blojsom.util.BlojsomUtils;
044:
045: import javax.servlet.http.HttpServletRequest;
046: import javax.servlet.http.HttpServletResponse;
047: import java.util.Map;
048: import java.util.regex.Matcher;
049: import java.util.regex.Pattern;
050:
051: /**
052: * XSSFilterPlugin
053: *
054: * @author David Czarnecki
055: * @version $Id: XSSFilterPlugin.java,v 1.5 2007/01/17 02:35:10 czarneckid Exp $
056: * @since blojsom 3.0
057: */
058: public class XSSFilterPlugin implements Plugin, Listener {
059:
060: private Log _logger = LogFactory.getLog(XSSFilterPlugin.class);
061:
062: // Default set of balanced and unbalanced tags
063: private static final String[] DEFAULT_ALLOWED_BALANCED_TAGS = {
064: "b", "strong", "i", "em", "u", "s", "blockquote", "pre",
065: "ul", "li", "ol" };
066: private static final String[] DEFAULT_ALLOWED_UNBALANCED_TAGS = {
067: "br", "img" };
068:
069: // Initialization parameters
070: private static final String XSS_FILTER_ALLOWED_BALANCED_TAGS_IP = "plugin-xss-filter-allowed-balanced-tags";
071: private static final String XSS_FILTER_ALLOWED_UNBALANCED_TAGS_IP = "plugin-xss-filter-allowed-unbalanced-tags";
072: private static final String XSS_FILTER_ALLOW_LINKS_IP = "plugin-xss-filter-allow-links";
073: private static final String XSS_FILTER_PROCESS_ENTRIES_IP = "plugin-xss-filter-process-entries";
074:
075: // Context variables
076: private static final String XSS_FILTER_ALLOWED_BALANCED_TAGS = "XSS_FILTER_ALLOWED_BALANCED_TAGS";
077: private static final String XSS_FILTER_ALLOWED_UNBALANCED_TAGS = "XSS_FILTER_ALLOWED_UNBALANCED_TAGS";
078: private static final String XSS_FILTER_ALLOW_LINKS = "XSS_FILTER_ALLOW_LINKS";
079:
080: private EventBroadcaster _eventBroadcaster;
081:
082: /**
083: * Set the {@link EventBroadcaster} to use
084: *
085: * @param eventBroadcaster {@link EventBroadcaster}
086: */
087: public void setEventBroadcaster(EventBroadcaster eventBroadcaster) {
088: _eventBroadcaster = eventBroadcaster;
089: }
090:
091: /**
092: * Initialize this plugin. This method only called when the plugin is instantiated.
093: *
094: * @throws org.blojsom.plugin.PluginException
095: * If there is an error initializing the plugin
096: */
097: public void init() throws PluginException {
098: _eventBroadcaster.addListener(this );
099: }
100:
101: /**
102: * Process the blog entries
103: *
104: * @param httpServletRequest Request
105: * @param httpServletResponse Response
106: * @param blog {@link Blog} instance
107: * @param context Context
108: * @param entries Blog entries retrieved for the particular request
109: * @return Modified set of blog entries
110: * @throws PluginException If there is an error processing the blog entries
111: */
112: public Entry[] process(HttpServletRequest httpServletRequest,
113: HttpServletResponse httpServletResponse, Blog blog,
114: Map context, Entry[] entries) throws PluginException {
115: // Get the individual blog's initialization parameters
116: String allowedBalancedTagsIP = blog
117: .getProperty(XSS_FILTER_ALLOWED_BALANCED_TAGS_IP);
118: String[] allowedBalancedTags = DEFAULT_ALLOWED_BALANCED_TAGS;
119: if (!BlojsomUtils.checkNullOrBlank(allowedBalancedTagsIP)) {
120: allowedBalancedTags = BlojsomUtils
121: .parseCommaList(allowedBalancedTagsIP);
122: }
123: context.put(XSS_FILTER_ALLOWED_BALANCED_TAGS,
124: allowedBalancedTags);
125:
126: String allowedUnbalancedTagsIP = blog
127: .getProperty(XSS_FILTER_ALLOWED_UNBALANCED_TAGS_IP);
128: String[] allowedUnbalancedTags = DEFAULT_ALLOWED_UNBALANCED_TAGS;
129: if (!BlojsomUtils.checkNullOrBlank(allowedUnbalancedTagsIP)) {
130: allowedUnbalancedTags = BlojsomUtils
131: .parseCommaList(allowedUnbalancedTagsIP);
132: }
133: context.put(XSS_FILTER_ALLOWED_UNBALANCED_TAGS,
134: allowedUnbalancedTags);
135:
136: String allowLinksIP = blog
137: .getProperty(XSS_FILTER_ALLOW_LINKS_IP);
138: Boolean allowLinks = Boolean.TRUE;
139: if (!BlojsomUtils.checkNullOrBlank(allowLinksIP)) {
140: allowLinks = Boolean.valueOf(allowLinksIP);
141: }
142: context.put(XSS_FILTER_ALLOW_LINKS, allowLinks);
143:
144: return entries;
145: }
146:
147: /**
148: * Perform any cleanup for the plugin. Called after {@link #process}.
149: *
150: * @throws org.blojsom.plugin.PluginException
151: * If there is an error performing cleanup for this plugin
152: */
153: public void cleanup() throws PluginException {
154: }
155:
156: /**
157: * Called when BlojsomServlet is taken out of service
158: *
159: * @throws org.blojsom.plugin.PluginException
160: * If there is an error in finalizing this plugin
161: */
162: public void destroy() throws PluginException {
163: }
164:
165: /**
166: * Handle an event broadcast from another component
167: *
168: * @param event {@link org.blojsom.event.Event} to be handled
169: */
170: public void handleEvent(Event event) {
171: }
172:
173: /**
174: * Process an event from another component
175: *
176: * @param event {@link org.blojsom.event.Event} to be handled
177: */
178: public void processEvent(Event event) {
179: if (event instanceof CommentResponseSubmissionEvent) {
180: if (_logger.isDebugEnabled()) {
181: _logger
182: .debug("Processing comment response submission event");
183: }
184:
185: CommentResponseSubmissionEvent commentEvent = (CommentResponseSubmissionEvent) event;
186:
187: String commentText = commentEvent.getContent();
188: commentText = processContent(commentText, commentEvent
189: .getBlog());
190:
191: // Save the processed comment text
192: commentEvent.setContent(commentText);
193: } else if (event instanceof ProcessEntryEvent) {
194: ProcessEntryEvent entryEvent = (ProcessEntryEvent) event;
195: Blog blog = entryEvent.getBlog();
196:
197: // Check to see if we should process entries through the XSS filter
198: if (Boolean.valueOf(
199: blog.getProperty(XSS_FILTER_PROCESS_ENTRIES_IP))
200: .booleanValue()) {
201: if (_logger.isDebugEnabled()) {
202: _logger
203: .debug("Processing process blog entry event");
204: }
205:
206: if (entryEvent.getEntry() != null) {
207: String entryText = entryEvent.getEntry()
208: .getDescription();
209: entryText = processContent(entryText, entryEvent
210: .getBlog());
211:
212: // Save the processed entry text
213: entryEvent.getEntry().setDescription(entryText);
214:
215: String entryTitle = entryEvent.getEntry()
216: .getTitle();
217: entryTitle = processContent(entryTitle, entryEvent
218: .getBlog());
219:
220: // Save the processed entry title
221: entryEvent.getEntry().setTitle(entryTitle);
222: }
223: }
224: }
225: }
226:
227: /**
228: * Internal method to process any string content through the various routines
229: *
230: * @param content Content
231: * @param blog {@link Blog} information}
232: * @return Processed content
233: */
234: protected String processContent(String content, Blog blog) {
235: // Get the individual blog's initialization parameters
236: String allowedBalancedTagsIP = blog
237: .getProperty(XSS_FILTER_ALLOWED_BALANCED_TAGS_IP);
238: String[] allowedBalancedTags = DEFAULT_ALLOWED_BALANCED_TAGS;
239: if (!BlojsomUtils.checkNullOrBlank(allowedBalancedTagsIP)) {
240: allowedBalancedTags = BlojsomUtils
241: .parseCommaList(allowedBalancedTagsIP);
242: }
243:
244: String allowedUnbalancedTagsIP = blog
245: .getProperty(XSS_FILTER_ALLOWED_UNBALANCED_TAGS_IP);
246: String[] allowedUnbalancedTags = DEFAULT_ALLOWED_UNBALANCED_TAGS;
247: if (!BlojsomUtils.checkNullOrBlank(allowedUnbalancedTagsIP)) {
248: allowedUnbalancedTags = BlojsomUtils
249: .parseCommaList(allowedUnbalancedTagsIP);
250: }
251:
252: String allowLinksIP = blog
253: .getProperty(XSS_FILTER_ALLOW_LINKS_IP);
254: boolean allowLinks = true;
255: if (!BlojsomUtils.checkNullOrBlank(allowLinksIP)) {
256: allowLinks = Boolean.valueOf(allowLinksIP).booleanValue();
257: }
258:
259: content = BlojsomUtils.escapeStringSimple(content);
260:
261: if (content != null) {
262: // Process balanced tags
263: for (int i = 0; i < allowedBalancedTags.length; i++) {
264: String allowedBalancedTag = allowedBalancedTags[i];
265:
266: content = replaceBalancedTag(content,
267: allowedBalancedTag);
268: }
269:
270: // Process unbalanced tags
271: for (int i = 0; i < allowedUnbalancedTags.length; i++) {
272: String allowedUnbalancedTag = allowedUnbalancedTags[i];
273:
274: content = replaceUnbalancedTag(content,
275: allowedUnbalancedTag);
276: }
277:
278: // Process links
279: if (allowLinks) {
280: content = processLinks(content);
281: }
282:
283: content = processImgTags(content);
284:
285: // Escaped brackets
286: content = content.replaceAll("&lt;", "<");
287: content = content.replaceAll("&gt;", ">");
288: content = content.replaceAll("&#", "&#");
289: }
290:
291: return content;
292: }
293:
294: /**
295: * Replace balanced tags
296: *
297: * @param input Input
298: * @param tag Tag
299: * @return String where the <<code>tag</code>> and <<code>/tag</code>> have been replaced appropriately
300: */
301: private String replaceBalancedTag(String input, String tag) {
302: Pattern openingPattern = Pattern.compile("<" + tag + ">",
303: Pattern.CASE_INSENSITIVE);
304: Pattern closingPattern = Pattern.compile(
305: "</" + tag + ">", Pattern.CASE_INSENSITIVE);
306:
307: Matcher openingMatcher = openingPattern.matcher(input);
308: input = openingMatcher.replaceAll("<" + tag + ">");
309:
310: Matcher closingMatcher = closingPattern.matcher(input);
311: input = closingMatcher.replaceAll("</" + tag + ">");
312:
313: return input;
314: }
315:
316: /**
317: * Replace unbalanced tags
318: *
319: * @param input Input
320: * @param tag Tag
321: * @return String where the <<code>tag /</code>> have been replaced appropriately
322: */
323: private String replaceUnbalancedTag(String input, String tag) {
324: Pattern unbalancedPattern = Pattern.compile("<" + tag
325: + "\\s*/*>", Pattern.CASE_INSENSITIVE);
326:
327: Matcher unbalancedMatcher = unbalancedPattern.matcher(input);
328: input = unbalancedMatcher.replaceAll("<" + tag + " />");
329:
330: return input;
331: }
332:
333: /**
334: * Process <a href .../> links
335: *
336: * @param input Input
337: * @return String where the <a href .../> links have been processed appropriately
338: */
339: private String processLinks(String input) {
340: Pattern openingLinkPattern = Pattern.compile(
341: "<a href=.*?>", Pattern.CASE_INSENSITIVE);
342: Pattern closingLinkPattern = Pattern.compile("</a>",
343: Pattern.CASE_INSENSITIVE);
344:
345: Matcher closingMatcher = closingLinkPattern.matcher(input);
346: input = closingMatcher.replaceAll("</a>");
347:
348: Matcher openingMatcher = openingLinkPattern.matcher(input);
349: while (openingMatcher.find()) {
350: int start = openingMatcher.start();
351: int end = openingMatcher.end();
352: String link = input.substring(start, end);
353: link = "<" + link.substring(4, link.length() - 4) + ">";
354: input = input.substring(0, start) + link
355: + input.substring(end, input.length());
356: openingMatcher = openingLinkPattern.matcher(input);
357: }
358:
359: return input;
360: }
361:
362: /**
363: * Process <img ... /> tags
364: *
365: * @param input Input
366: * @return String where the <img ... /> links have been processed appropriately
367: */
368: private String processImgTags(String input) {
369: Pattern imgPattern = Pattern.compile(
370: "(<)(\\s*img\\s?.*?\\s*/*)(>)",
371: Pattern.CASE_INSENSITIVE);
372: Matcher imgMatcher = imgPattern.matcher(input);
373:
374: // Replace all occurrences of pattern in input
375: StringBuffer buffer = new StringBuffer();
376: while (imgMatcher.find()) {
377: // Insert replacement
378: imgMatcher.appendReplacement(buffer, "<"
379: + imgMatcher.group(2) + ">");
380: }
381:
382: imgMatcher.appendTail(buffer);
383:
384: return buffer.toString();
385: }
386: }
|