001: /*---------------------------------------------------------------------------*\
002: $Id: FeedMaxSummarySizePlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.CurnConfig;
049: import org.clapper.curn.CurnException;
050: import org.clapper.curn.FeedInfo;
051: import org.clapper.curn.FeedConfigItemPlugIn;
052: import org.clapper.curn.MainConfigItemPlugIn;
053: import org.clapper.curn.PostFeedParsePlugIn;
054: import org.clapper.curn.parser.RSSChannel;
055: import org.clapper.curn.parser.RSSItem;
056:
057: import org.clapper.util.classutil.ClassUtil;
058: import org.clapper.util.config.ConfigurationException;
059: import org.clapper.util.logging.Logger;
060: import org.clapper.util.html.HTMLUtil;
061:
062: import java.util.HashMap;
063: import java.util.Map;
064: import org.clapper.curn.FeedCache;
065:
066: /**
067: * The <tt>FeedMaxSummarySizePlugIn</tt> optionally truncates a feed's
068: * summary to a maximum number of characters, inserting an ellipsis at the
069: * end to indicate truncation. It truncates on a word boundary, if
070: * possible. This plug-in intercepts the * following configuration
071: * parameters:
072: *
073: * <table border="1">
074: * <tr valign="top" align="left">
075: * <th>Section</th>
076: * <th>Parameter</th>
077: * <th>Meaning</th>
078: * </tr>
079: * <tr valign="top">
080: * <td><tt>[curn]</tt></td>
081: * <td><tt>MaxSummarySize</tt></td>
082: * <td>The default (global) setting for all feeds. If not specified, the
083: * default value is, essentially, infinite.</td>
084: * </tr>
085: * <tr valign="top">
086: * <td><tt>[Feed<i>xxx</i>]</tt></td>
087: * <td><tt>MaxSummarySize</tt></td>
088: * <td>The setting for a specific feed. If not specified, the global
089: * default is used.
090: * </td>
091: * </tr>
092: * </table>
093: *
094: * @version <tt>$Revision: 7041 $</tt>
095: */
096: public class FeedMaxSummarySizePlugIn implements MainConfigItemPlugIn,
097: FeedConfigItemPlugIn, PostFeedParsePlugIn {
098: /*----------------------------------------------------------------------*\
099: Private Constants
100: \*----------------------------------------------------------------------*/
101:
102: private static final String VAR_MAX_SUMMARY_SIZE = "MaxSummarySize";
103: private static final int NO_MAX = Integer.MAX_VALUE;
104:
105: /*----------------------------------------------------------------------*\
106: Private Data Items
107: \*----------------------------------------------------------------------*/
108:
109: /**
110: * Feed save data, by feed
111: */
112: private Map<FeedInfo, Integer> perFeedMaxSummarySize = new HashMap<FeedInfo, Integer>();
113:
114: /**
115: * The global default
116: */
117: private int maxSummarySizeDefault = NO_MAX;
118:
119: /**
120: * For log messages
121: */
122: private static final Logger log = new Logger(
123: FeedMaxSummarySizePlugIn.class);
124:
125: /*----------------------------------------------------------------------*\
126: Constructor
127: \*----------------------------------------------------------------------*/
128:
129: /**
130: * Default constructor (required).
131: */
132: public FeedMaxSummarySizePlugIn() {
133: // Nothing to do
134: }
135:
136: /*----------------------------------------------------------------------*\
137: Public Methods Required by *PlugIn Interfaces
138: \*----------------------------------------------------------------------*/
139:
140: /**
141: * Get a displayable name for the plug-in. A return value of null can
142: * be used for "invisible" plug-ins.
143: *
144: * @return the name
145: */
146: public String getPlugInName() {
147: return "Feed Max Summary Size";
148: }
149:
150: /**
151: * Get the sort key for this plug-in.
152: *
153: * @return the sort key string.
154: */
155: public String getPlugInSortKey() {
156: return ClassUtil.getShortClassName(getClass().getName());
157: }
158:
159: /**
160: * Initialize the plug-in. This method is called before any of the
161: * plug-in methods are called.
162: *
163: * @throws CurnException on error
164: */
165: public void initPlugIn() throws CurnException {
166: }
167:
168: /**
169: * Called immediately after <i>curn</i> has read and processed a
170: * configuration item in the main [curn] configuration section. All
171: * configuration items are passed, one by one, to each loaded plug-in.
172: * If a plug-in class is not interested in a particular configuration
173: * item, this method should simply return without doing anything. Note
174: * that some configuration items may simply be variable assignment;
175: * there's no real way to distinguish a variable assignment from a
176: * blessed configuration item.
177: *
178: * @param sectionName the name of the configuration section where
179: * the item was found
180: * @param paramName the name of the parameter
181: * @param config the {@link CurnConfig} object
182: *
183: * @throws CurnException on error
184: *
185: * @see CurnConfig
186: */
187: public void runMainConfigItemPlugIn(String sectionName,
188: String paramName, CurnConfig config) throws CurnException {
189: try {
190: if (paramName.equals(VAR_MAX_SUMMARY_SIZE)) {
191: maxSummarySizeDefault = config
192: .getRequiredCardinalValue(sectionName,
193: paramName);
194: if (maxSummarySizeDefault == 0)
195: maxSummarySizeDefault = NO_MAX;
196: }
197: }
198:
199: catch (ConfigurationException ex) {
200: throw new CurnException(ex);
201: }
202: }
203:
204: /**
205: * Called immediately after <i>curn</i> has read and processed a
206: * configuration item in a "feed" configuration section. All
207: * configuration items are passed, one by one, to each loaded plug-in.
208: * If a plug-in class is not interested in a particular configuration
209: * item, this method should simply return without doing anything. Note
210: * that some configuration items may simply be variable assignment;
211: * there's no real way to distinguish a variable assignment from a
212: * blessed configuration item.
213: *
214: * @param sectionName the name of the configuration section where
215: * the item was found
216: * @param paramName the name of the parameter
217: * @param config the active configuration
218: * @param feedInfo partially complete <tt>FeedInfo</tt> object
219: * for the feed. The URL is guaranteed to be
220: * present, but no other fields are.
221: *
222: * @return <tt>true</tt> to continue processing the feed,
223: * <tt>false</tt> to skip it
224: *
225: * @throws CurnException on error
226: *
227: * @see CurnConfig
228: * @see FeedInfo
229: * @see FeedInfo#getURL
230: */
231: public boolean runFeedConfigItemPlugIn(String sectionName,
232: String paramName, CurnConfig config, FeedInfo feedInfo)
233: throws CurnException {
234: try {
235: if (paramName.equals(VAR_MAX_SUMMARY_SIZE)) {
236: int max = config.getRequiredCardinalValue(sectionName,
237: paramName);
238: if (max == 0)
239: max = NO_MAX;
240:
241: perFeedMaxSummarySize.put(feedInfo, max);
242: log.debug("[" + sectionName + "]: " + paramName + "="
243: + max);
244: }
245:
246: return true;
247: }
248:
249: catch (ConfigurationException ex) {
250: throw new CurnException(ex);
251: }
252: }
253:
254: /**
255: * Called immediately after a feed is parsed, but before it is
256: * otherwise processed. This method can return <tt>false</tt> to signal
257: * <i>curn</i> that the feed should be skipped. For instance, a plug-in
258: * that filters on the parsed feed data could use this method to weed
259: * out non-matching feeds before they are downloaded. Similarly, a
260: * plug-in that edits the parsed data (removing or editing individual
261: * items, for instance) could use method to do so.
262: *
263: * @param feedInfo the {@link FeedInfo} object for the feed that
264: * has been downloaded and parsed.
265: * @param feedCache the feed cache
266: * @param channel the parsed channel data
267: *
268: * @return <tt>true</tt> if <i>curn</i> should continue to process the
269: * feed, <tt>false</tt> to skip the feed. A return value of
270: * <tt>false</tt> aborts all further processing on the feed.
271: * In particular, <i>curn</i> will not pass the feed along to
272: * other plug-ins that have yet to be notified of this event.
273: *
274: * @throws CurnException on error
275: *
276: * @see RSSChannel
277: * @see FeedInfo
278: */
279: public boolean runPostFeedParsePlugIn(FeedInfo feedInfo,
280: FeedCache feedCache, RSSChannel channel)
281: throws CurnException {
282: Integer maxBoxed = perFeedMaxSummarySize.get(feedInfo);
283: int max = maxSummarySizeDefault;
284:
285: if (maxBoxed != null)
286: max = maxBoxed;
287:
288: if (max != NO_MAX) {
289: log.debug("Truncating all item summaries to " + max
290: + " characters for feed \""
291: + feedInfo.getURL().toString() + "\"");
292: for (RSSItem item : channel.getItems()) {
293: String summary = item.getSummary();
294: if (summary != null)
295: item.setSummary(truncateSummary(summary, max));
296: }
297: }
298:
299: return true;
300: }
301:
302: /*----------------------------------------------------------------------*\
303: Private Methods
304: \*----------------------------------------------------------------------*/
305:
306: /**
307: * Truncate an RSS item's summary to a specified size. Truncates on
308: * word boundary, if possible.
309: *
310: * @param summary the summary to truncate
311: * @param maxSize the maximum size
312: *
313: * @return the truncated summary
314: */
315: private String truncateSummary(String summary, int maxSize) {
316: // Can't truncate HTML right now...
317: summary = HTMLUtil.textFromHTML(summary.trim());
318:
319: if (summary.length() > maxSize) {
320: // Allow for ellipsis
321:
322: if (maxSize < 4)
323: maxSize = 4;
324:
325: maxSize -= 4;
326:
327: int last = maxSize;
328: char[] ch = summary.toCharArray();
329: int i = last;
330:
331: // If we're in the middle of a word, find the first hunk of
332: // white space.
333:
334: while ((!Character.isWhitespace(ch[i])) && (i-- >= 0))
335: continue;
336:
337: // Next, get rid of trailing white space.
338:
339: while ((Character.isWhitespace(ch[i])) && (i-- >= 0))
340: continue;
341:
342: // Handle underflow.
343:
344: if (i >= 0)
345: last = i;
346:
347: StringBuilder buf = new StringBuilder(summary.substring(0,
348: last + 1));
349: buf.append(" ...");
350: summary = buf.toString();
351: }
352:
353: return summary;
354: }
355: }
|