001: /**
002: * Copyright (c) 2003-2007, David A. Czarnecki
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * Redistributions of source code must retain the above copyright notice, this list of conditions and the
009: * following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
011: * following disclaimer in the documentation and/or other materials provided with the distribution.
012: * Neither the name of "David A. Czarnecki" and "blojsom" nor the names of its contributors may be used to
013: * endorse or promote products derived from this software without specific prior written permission.
014: * Products derived from this software may not be called "blojsom", nor may "blojsom" appear in their name,
015: * without prior written permission of David A. Czarnecki.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
018: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
019: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020: * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
021: * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
022: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
025: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026: * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
027: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
028: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
029: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: */package org.blojsom.plugin.trackback;
031:
032: import org.apache.commons.logging.Log;
033: import org.apache.commons.logging.LogFactory;
034: import org.blojsom.blog.Blog;
035: import org.blojsom.blog.Entry;
036: import org.blojsom.plugin.Plugin;
037: import org.blojsom.plugin.PluginException;
038: import org.blojsom.util.BlojsomConstants;
039:
040: import javax.servlet.http.HttpServletRequest;
041: import javax.servlet.http.HttpServletResponse;
042: import java.io.BufferedReader;
043: import java.io.IOException;
044: import java.io.InputStreamReader;
045: import java.net.HttpURLConnection;
046: import java.net.URL;
047: import java.net.URLEncoder;
048: import java.util.Map;
049: import java.util.regex.Matcher;
050: import java.util.regex.Pattern;
051:
052: /**
053: * AutoTrackbackPlugin
054: *
055: * @author David Czarnecki
056: * @since blojsom 3.0
057: * @version $Id: AutoTrackbackPlugin.java,v 1.3 2007/01/17 02:35:14 czarneckid Exp $
058: */
059: public class AutoTrackbackPlugin implements Plugin {
060:
061: private Log _logger = LogFactory.getLog(AutoTrackbackPlugin.class);
062:
063: private static final int REGEX_OPTIONS = Pattern.DOTALL
064: | Pattern.MULTILINE | Pattern.CASE_INSENSITIVE;
065: private static final Pattern RDF_OUTER_PATTERN = Pattern.compile(
066: "(<rdf:RDF.*?</rdf:RDF>).*?", REGEX_OPTIONS);
067: private static final Pattern RDF_INNER_PATTERN = Pattern.compile(
068: "(<rdf:Description.*/>)", REGEX_OPTIONS);
069: private static final Pattern DC_IDENTIFIER_PATTERN = Pattern
070: .compile("dc:identifier=\"(.*)\"");
071: private static final Pattern TRACKBACK_PING_PATTERN = Pattern
072: .compile("trackback:ping=\"(.*)\"");
073: private static final Pattern HREF_PATTERN = Pattern.compile(
074: "<\\s*a.*?href\\s*=\\s*\"(([^\"]+).*?)\"\\s*>",
075: REGEX_OPTIONS);
076:
077: /**
078: * Initialize this plugin. This method only called when the plugin is instantiated.
079: *
080: * @throws org.blojsom.plugin.PluginException If there is an error initializing the plugin
081: */
082: public void init() throws PluginException {
083: }
084:
085: /**
086: * Perform the trackback autodiscovery process
087: *
088: * @param blog Blog information
089: * @param blogEntry Blog entry
090: */
091: private void trackbackAutodiscovery(Blog blog, Entry blogEntry) {
092: try {
093: // Build the URL parameters for the trackback ping URL
094: StringBuffer trackbackPingURLParameters = new StringBuffer();
095: trackbackPingURLParameters.append("&").append(
096: TrackbackPlugin.TRACKBACK_URL_PARAM).append("=")
097: .append(blogEntry.getId());
098: trackbackPingURLParameters.append("&").append(
099: TrackbackPlugin.TRACKBACK_TITLE_PARAM).append("=")
100: .append(
101: URLEncoder.encode(blogEntry.getTitle(),
102: BlojsomConstants.UTF8));
103: trackbackPingURLParameters.append("&").append(
104: TrackbackPlugin.TRACKBACK_BLOG_NAME_PARAM).append(
105: "=").append(
106: URLEncoder.encode(blog.getBlogName(),
107: BlojsomConstants.UTF8));
108:
109: String excerpt = blogEntry.getDescription().replaceAll(
110: "<.*?>", "");
111: if (excerpt.length() > 255) {
112: excerpt = excerpt.substring(0, 251);
113: excerpt += "...";
114: }
115: trackbackPingURLParameters.append("&").append(
116: TrackbackPlugin.TRACKBACK_EXCERPT_PARAM)
117: .append("=").append(
118: URLEncoder.encode(excerpt,
119: BlojsomConstants.UTF8));
120:
121: // Extract all the HREF links from the blog description
122: Matcher hrefMatcher = HREF_PATTERN.matcher(blogEntry
123: .getDescription());
124: while (hrefMatcher.find()) {
125:
126: // If we have a group count of 2, the inner group will be the http:// reference
127: // Read the entire contents of the URL into a buffer
128: if (hrefMatcher.groupCount() == 2) {
129: String hyperlink = hrefMatcher.group(1);
130: if (_logger.isDebugEnabled()) {
131: _logger.debug("Found hyperlink: " + hyperlink);
132: }
133: BufferedReader br;
134: URL hyperlinkURL = new URL(hyperlink);
135: br = new BufferedReader(new InputStreamReader(
136: hyperlinkURL.openStream()));
137: String html;
138: StringBuffer contents = new StringBuffer();
139: while ((html = br.readLine()) != null) {
140: contents.append(html).append("\n");
141: }
142:
143: // Look for the Auto Trackback RDF in the HTML
144: Matcher rdfOuterMatcher = RDF_OUTER_PATTERN
145: .matcher(contents.toString());
146: while (rdfOuterMatcher.find()) {
147: if (_logger.isDebugEnabled()) {
148: _logger
149: .debug("Found outer RDF text in hyperlink");
150: }
151: for (int i = 0; i < rdfOuterMatcher
152: .groupCount(); i++) {
153: String outerRdfText = rdfOuterMatcher
154: .group(i);
155:
156: // Look for the inner RDF description
157: Matcher rdfInnerMatcher = RDF_INNER_PATTERN
158: .matcher(outerRdfText);
159: while (rdfInnerMatcher.find()) {
160: if (_logger.isDebugEnabled()) {
161: _logger
162: .debug("Found inner RDF text in hyperlink");
163: }
164: for (int j = 0; j < rdfInnerMatcher
165: .groupCount(); j++) {
166: String innerRdfText = rdfInnerMatcher
167: .group(j);
168:
169: // Look for a dc:identifier attribute which matches the current hyperlink
170: Matcher dcIdentifierMatcher = DC_IDENTIFIER_PATTERN
171: .matcher(innerRdfText);
172: if (dcIdentifierMatcher.find()) {
173: String dcIdentifier = dcIdentifierMatcher
174: .group(1);
175:
176: // If we find a match, send a trackback ping to the
177: if (dcIdentifier
178: .equals(hyperlink)) {
179: if (_logger
180: .isDebugEnabled()) {
181: _logger
182: .debug("Matched dc:identifier to hyperlink");
183: }
184: Matcher trackbackPingMatcher = TRACKBACK_PING_PATTERN
185: .matcher(innerRdfText);
186: if (trackbackPingMatcher
187: .find()) {
188: StringBuffer trackbackPingURL = new StringBuffer(
189: trackbackPingMatcher
190: .group(1));
191:
192: if (_logger
193: .isDebugEnabled()) {
194: _logger
195: .debug("Automatically sending trackback ping to URL: "
196: + trackbackPingURL
197: .toString());
198: }
199: URL trackbackUrl = new URL(
200: trackbackPingURL
201: .toString());
202:
203: // Open a connection to the trackback URL and read its input
204: HttpURLConnection trackbackUrlConnection = (HttpURLConnection) trackbackUrl
205: .openConnection();
206: trackbackUrlConnection
207: .setRequestMethod("POST");
208: trackbackUrlConnection
209: .setRequestProperty(
210: "Content-Encoding",
211: BlojsomConstants.UTF8);
212: trackbackUrlConnection
213: .setRequestProperty(
214: "Content-Type",
215: "application/x-www-form-urlencoded");
216: trackbackUrlConnection
217: .setRequestProperty(
218: "Content-Length",
219: ""
220: + trackbackPingURLParameters
221: .length());
222: trackbackUrlConnection
223: .setDoOutput(true);
224: trackbackUrlConnection
225: .getOutputStream()
226: .write(
227: trackbackPingURLParameters
228: .toString()
229: .getBytes(
230: BlojsomConstants.UTF8));
231: trackbackUrlConnection
232: .connect();
233: BufferedReader trackbackStatus = new BufferedReader(
234: new InputStreamReader(
235: trackbackUrlConnection
236: .getInputStream()));
237: String line;
238: StringBuffer status = new StringBuffer();
239: while ((line = trackbackStatus
240: .readLine()) != null) {
241: status
242: .append(
243: line)
244: .append(
245: "\n");
246: }
247: }
248: }
249: }
250: }
251: }
252: }
253: }
254: }
255: }
256: } catch (IOException e) {
257: if (_logger.isErrorEnabled()) {
258: _logger.error(e);
259: }
260: }
261: }
262:
263: /**
264: * Process the blog entries
265: *
266: * @param httpServletRequest Request
267: * @param httpServletResponse Response
268: * @param blog {@link Blog} instance
269: * @param context Context
270: * @param entries Blog entries retrieved for the particular request
271: * @return Modified set of blog entries
272: * @throws PluginException If there is an error processing the blog entries
273: */
274: public Entry[] process(HttpServletRequest httpServletRequest,
275: HttpServletResponse httpServletResponse, Blog blog,
276: Map context, Entry[] entries) throws PluginException {
277: for (int i = 0; i < entries.length; i++) {
278: Entry entry = entries[i];
279: if (entry.getMetaData() != null) {
280: Map entryMetaData = entry.getMetaData();
281: if (entryMetaData.containsKey("auto-trackback")
282: && !entryMetaData
283: .containsKey("auto-trackback-complete")) {
284: trackbackAutodiscovery(blog, entry);
285: entryMetaData
286: .put("auto-trackback-complete", "true");
287: }
288: } else {
289: _logger.debug("Skipping blog entry for autotrackback: "
290: + entry.getId());
291: }
292: }
293:
294: return entries;
295: }
296:
297: /**
298: * Perform any cleanup for the plugin. Called after {@link #process}.
299: *
300: * @throws PluginException If there is an error performing cleanup for this plugin
301: */
302: public void cleanup() throws PluginException {
303: }
304:
305: /**
306: * Called when BlojsomServlet is taken out of service
307: *
308: * @throws PluginException If there is an error in finalizing this plugin
309: */
310: public void destroy() throws PluginException {
311: }
312: }
|