001: /**
002: * Copyright (c) 2003-2007, David A. Czarnecki
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * Redistributions of source code must retain the above copyright notice, this list of conditions and the
009: * following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
011: * following disclaimer in the documentation and/or other materials provided with the distribution.
012: * Neither the name of "David A. Czarnecki" and "blojsom" nor the names of its contributors may be used to
013: * endorse or promote products derived from this software without specific prior written permission.
014: * Products derived from this software may not be called "blojsom", nor may "blojsom" appear in their name,
015: * without prior written permission of David A. Czarnecki.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
018: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
019: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
021: * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
022: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
025: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026: * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
027: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
028: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
029: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: */package org.blojsom.plugin.nofollow;
031:
032: import org.apache.commons.logging.Log;
033: import org.apache.commons.logging.LogFactory;
034: import org.blojsom.blog.Blog;
035: import org.blojsom.blog.Comment;
036: import org.blojsom.blog.Entry;
037: import org.blojsom.blog.Trackback;
038: import org.blojsom.plugin.Plugin;
039: import org.blojsom.plugin.PluginException;
040: import org.blojsom.util.BlojsomUtils;
041:
042: import javax.servlet.http.HttpServletRequest;
043: import javax.servlet.http.HttpServletResponse;
044: import java.util.Iterator;
045: import java.util.Map;
046: import java.util.regex.Matcher;
047: import java.util.regex.Pattern;
048:
049: /**
050: * NoFollow plugin adds support for the rel="nofollow" attribute on links added
051: * to comments and trackbacks.
052: *
053: * @author David Czarnecki
054: * @version $Id: NoFollowPlugin.java,v 1.3 2007/01/17 02:35:12 czarneckid Exp $
055: * @since blojsom 3.0
056: */
057: public class NoFollowPlugin implements Plugin {
058:
059: private Log _logger = LogFactory.getLog(NoFollowPlugin.class);
060:
061: private static final String HYPERLINK_REGEX = "<a\\s([^>]*\\s*href\\s*=[^>]*)>";
062: private static final String ATTRIBUTE_REGEX = "[^=[\\p{Space}]]*\\s*=\\s*\"[^\"]*\"|[^=[\\p{Space}]]*\\s*=\\s*'[^']*'|[^=[\\p{Space}]]*\\s*=[^[\\p{Space}]]*";
063: private static final String NOFOLLOW_REGEX = "\\s*nofollow\\s*";
064: private static final String REL_ATTR_REGEX = "rel\\s*=";
065: private static final String REL_NOFOLLOW = " rel=\"nofollow\"";
066:
067: /**
068: * Construct a new instance of the NoFollow plugin
069: */
070: public NoFollowPlugin() {
071: }
072:
073: /**
074: * Initialize this plugin. This method only called when the plugin is instantiated.
075: *
076: * @throws org.blojsom.plugin.PluginException
077: * If there is an error initializing the plugin
078: */
079: public void init() throws PluginException {
080: }
081:
082: /**
083: * Take a string and add rel="nofollow" attributes to the <a href/> links
084: * if they are not already on the links.
085: *
086: * @param text Text to look for hyperlinks
087: * @return Text with rel="nofollow" attributes added to the hyperlinks
088: */
089: protected String noFollowFy(String text) {
090: if (BlojsomUtils.checkNullOrBlank(text)) {
091: return text;
092: }
093:
094: StringBuffer updatedText = new StringBuffer();
095:
096: Pattern hyperlinkPattern = Pattern.compile(HYPERLINK_REGEX,
097: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE
098: | Pattern.UNICODE_CASE | Pattern.DOTALL);
099: Matcher hyperlinkMatcher = hyperlinkPattern.matcher(text);
100:
101: Pattern attributePattern = Pattern.compile(ATTRIBUTE_REGEX,
102: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE
103: | Pattern.UNICODE_CASE | Pattern.DOTALL);
104: Pattern relAttrPattern = Pattern.compile(REL_ATTR_REGEX,
105: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE
106: | Pattern.UNICODE_CASE | Pattern.DOTALL);
107: Pattern noFollow = Pattern.compile(NOFOLLOW_REGEX,
108: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE
109: | Pattern.UNICODE_CASE | Pattern.DOTALL);
110:
111: Matcher noFollowMatcher;
112: Matcher attributeMatcher;
113:
114: int lastIndex = 0;
115: while (hyperlinkMatcher.find()) {
116: updatedText.append(text.substring(lastIndex,
117: hyperlinkMatcher.start()));
118: String link = hyperlinkMatcher.group();
119: attributeMatcher = attributePattern.matcher(link);
120:
121: StringBuffer updatedLink = new StringBuffer();
122: boolean shouldAddAttr = true;
123:
124: while (attributeMatcher.find()) {
125: String attr = attributeMatcher.group();
126:
127: Matcher relAttrMatcher = relAttrPattern.matcher(attr);
128: while (relAttrMatcher.find()) {
129: noFollowMatcher = noFollow.matcher(attr);
130: if (!noFollowMatcher.matches()) {
131: int indexOfQuote = attr.lastIndexOf("\"");
132: if (indexOfQuote != -1) {
133: attr = attr.substring(0, indexOfQuote)
134: + " nofollow\"";
135: shouldAddAttr = false;
136: }
137: }
138: }
139:
140: updatedLink.append(attr);
141: }
142:
143: if (shouldAddAttr) {
144: updatedLink.append(REL_NOFOLLOW);
145: }
146:
147: updatedLink.append((">"));
148: updatedText.append(updatedLink);
149: lastIndex = hyperlinkMatcher.end();
150: }
151:
152: updatedText.append(text.substring(lastIndex));
153:
154: return updatedText.toString();
155: }
156:
157: /**
158: * Process the blog entries
159: *
160: * @param httpServletRequest Request
161: * @param httpServletResponse Response
162: * @param blog {@link Blog} instance
163: * @param context Context
164: * @param entries Blog entries retrieved for the particular request
165: * @return Modified set of blog entries
166: * @throws PluginException If there is an error processing the blog entries
167: */
168: public Entry[] process(HttpServletRequest httpServletRequest,
169: HttpServletResponse httpServletResponse, Blog blog,
170: Map context, Entry[] entries) throws PluginException {
171: for (int i = 0; i < entries.length; i++) {
172: Entry entry = entries[i];
173:
174: Iterator commentsIterator = entry.getComments().iterator();
175: String commentText;
176: while (commentsIterator.hasNext()) {
177: Comment blogComment = (Comment) commentsIterator.next();
178:
179: commentText = blogComment.getComment();
180: commentText = noFollowFy(commentText);
181: blogComment.setComment(commentText);
182: }
183:
184: Iterator trackbacksIterator = entry.getTrackbacks()
185: .iterator();
186: String trackbackText;
187: while (trackbacksIterator.hasNext()) {
188: Trackback trackback = (Trackback) trackbacksIterator
189: .next();
190:
191: trackbackText = trackback.getExcerpt();
192: trackbackText = noFollowFy(trackbackText);
193: trackback.setExcerpt(trackbackText);
194: }
195: }
196:
197: return entries;
198: }
199:
200: /**
201: * Perform any cleanup for the plugin. Called after {@link #process}.
202: *
203: * @throws PluginException If there is an error performing cleanup for this plugin
204: */
205: public void cleanup() throws PluginException {
206: }
207:
208: /**
209: * Called when BlojsomServlet is taken out of service
210: *
211: * @throws PluginException If there is an error in finalizing this plugin
212: */
213: public void destroy() throws PluginException {
214: }
215: }
|