001: /**
002: * Copyright (c) 2003-2007, David A. Czarnecki
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * Redistributions of source code must retain the above copyright notice, this list of conditions and the
009: * following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
011: * following disclaimer in the documentation and/or other materials provided with the distribution.
012: * Neither the name of "David A. Czarnecki" and "blojsom" nor the names of its contributors may be used to
013: * endorse or promote products derived from this software without specific prior written permission.
014: * Products derived from this software may not be called "blojsom", nor may "blojsom" appear in their name,
015: * without prior written permission of David A. Czarnecki.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
018: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
019: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
021: * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
022: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
025: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026: * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
027: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
028: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
029: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: */package org.blojsom.plugin.highlight;
031:
032: import org.blojsom.blog.Blog;
033: import org.blojsom.blog.Entry;
034: import org.blojsom.plugin.Plugin;
035: import org.blojsom.plugin.PluginException;
036:
037: import javax.servlet.http.HttpServletRequest;
038: import javax.servlet.http.HttpServletResponse;
039: import java.util.Map;
040: import java.util.StringTokenizer;
041: import java.util.regex.Matcher;
042: import java.util.regex.Pattern;
043:
044: /**
045: * The GoogleHighlightPlugin will highlight words on your blog if the referer came from a Google
046: * query.
047: * <p/>
048: * Based on work from http://www.textism.com/
049: *
050: * @author Mark Lussier
051: * @version $Id: GoogleHighlightPlugin.java,v 1.3 2007/01/17 02:35:10 czarneckid Exp $
052: * @since blojsom 3.0
053: */
054: public class GoogleHighlightPlugin implements Plugin {
055:
056: /**
057: * HTTP Header for Referer Information
058: */
059: private static final String HEADER_REFERER = "referer";
060:
061: private static final String START_BOUNDRY = "(\\b";
062: private static final String END_BOUNDRY = "\\b)";
063:
064: /**
065: * Expression used to identify the referer as a Google referer
066: */
067: private static final String EXPRESSSION_GOOGLE = "^http:\\/\\/w?w?w?\\.?google.*";
068:
069: private static final String EXPRESSION_HTMLPREFIX = "(?<=>)([^<]+)?";
070: private static final String EXPRESSION_HASTAGS = "<.+>";
071:
072: /**
073: * Expression used to extract the Query string portion of the referer
074: */
075: private static final String GOOGLE_QUERY = "^.*q=([^&]+)&?.*$";
076:
077: /**
078: * Expression used to clean quotes
079: */
080: private static final String GOOGLE_CLEANQUOTES = "'/\'|\"/\"";
081:
082: /**
083: * Used to replace matches in entries that DO NOT have html tags
084: */
085: private static final String HIGHLIGHT_PLAINTEXT = "<span class=\"searchhighlight\">$1</span>";
086:
087: /**
088: * Used to replace matches in entries that HAVE html tags
089: */
090: private static final String HIGHLIGHT_HTML = "$1<span class=\"searchhighlight\">$2</span>";
091:
092: /**
093: * Initialize this plugin. This method only called when the plugin is instantiated.
094: *
095: * @throws PluginException If there is an error initializing the plugin
096: */
097: public void init() throws PluginException {
098: }
099:
100: /**
101: * Extract search tokens from the Google Query String
102: *
103: * @param referer The Google referer
104: * @return A string array of search words or <code>null</code> if no search query match is found
105: */
106: private String[] extractQueryTokens(String referer) {
107: String[] result = null;
108: Matcher matcher = Pattern.compile(GOOGLE_QUERY,
109: Pattern.CASE_INSENSITIVE).matcher(referer);
110: if (matcher.find()) {
111: String _query = matcher.group(1);
112: _query = _query.replaceAll(GOOGLE_CLEANQUOTES, "");
113: StringTokenizer _st = new StringTokenizer(_query, "+, .",
114: false);
115: result = new String[_st.countTokens()];
116: int cnt = 0;
117: while (_st.hasMoreElements()) {
118: result[cnt] = _st.nextToken();
119: cnt += 1;
120: }
121: }
122:
123: return result;
124: }
125:
126: /**
127: * Process the blog entries
128: *
129: * @param httpServletRequest Request
130: * @param httpServletResponse Response
131: * @param blog {@link Blog} instance
132: * @param context Context
133: * @param entries Blog entries retrieved for the particular request
134: * @return Modified set of blog entries
135: * @throws PluginException If there is an error processing the blog entries
136: */
137: public Entry[] process(HttpServletRequest httpServletRequest,
138: HttpServletResponse httpServletResponse, Blog blog,
139: Map context, Entry[] entries) throws PluginException {
140: String referer = httpServletRequest.getHeader(HEADER_REFERER);
141:
142: if (referer != null && referer.matches(EXPRESSSION_GOOGLE)) {
143: String[] searchwords = extractQueryTokens(referer);
144:
145: if (searchwords != null) {
146: Pattern hasTags = Pattern.compile(EXPRESSION_HASTAGS);
147:
148: for (int x = 0; x < entries.length; x++) {
149: Entry entry = entries[x];
150: Matcher matcher = hasTags.matcher(entry
151: .getDescription());
152: boolean isHtml = matcher.find();
153: for (int y = 0; y < searchwords.length; y++) {
154: String word = searchwords[y];
155: if (!isHtml) {
156: entry.setDescription(entry.getDescription()
157: .replaceAll(
158: START_BOUNDRY + word
159: + END_BOUNDRY,
160: HIGHLIGHT_PLAINTEXT));
161: } else {
162: entry.setDescription(entry.getDescription()
163: .replaceAll(
164: EXPRESSION_HTMLPREFIX
165: + START_BOUNDRY
166: + word
167: + END_BOUNDRY,
168: HIGHLIGHT_HTML));
169: }
170: }
171: }
172: }
173: }
174:
175: return entries;
176: }
177:
178: /**
179: * Perform any cleanup for the plugin. Called after {@link #process}.
180: *
181: * @throws PluginException If there is an error performing cleanup for this plugin
182: */
183: public void cleanup() throws PluginException {
184: }
185:
186: /**
187: * Called when BlojsomServlet is taken out of service
188: *
189: * @throws PluginException If there is an error in finalizing this plugin
190: */
191: public void destroy() throws PluginException {
192: }
193: }
|