001: /*---------------------------------------------------------------------------*\
002: $Id: RetainArticlesPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import java.net.URL;
049: import java.text.ParseException;
050: import java.util.Date;
051: import java.util.HashMap;
052: import java.util.Map;
053: import org.clapper.curn.CurnConfig;
054: import org.clapper.curn.CurnException;
055: import org.clapper.curn.CurnUtil;
056: import org.clapper.curn.FeedCache;
057: import org.clapper.curn.FeedCacheEntry;
058: import org.clapper.curn.FeedConfigItemPlugIn;
059: import org.clapper.curn.FeedInfo;
060: import org.clapper.curn.ForceFeedDownloadPlugIn;
061: import org.clapper.curn.MainConfigItemPlugIn;
062: import org.clapper.curn.PostFeedParsePlugIn;
063: import org.clapper.curn.parser.RSSChannel;
064: import org.clapper.curn.parser.RSSItem;
065: import org.clapper.util.classutil.ClassUtil;
066: import org.clapper.util.config.ConfigurationException;
067: import org.clapper.util.logging.Logger;
068: import org.clapper.util.text.Duration;
069:
070: /**
071: * The <tt>RetainArticlesPlugIn</tt> can be used to force articles in a feed
072: * (or in all feeds) to be displayed more than once. It looks for a default
073: * (main-configuration section) "ShowArticlesFor" parameter, and it permits
074: * a per-feed "ShowArticlesFor" parameter to override the default. The
075: * configuration parameter takes a time interval, expressed in a
076: * natural language string. (The {@link IgnoreOldArticlesPlugIn} class uses
077: * the same time interval form.) Examples:
078: *
079: * <ul>
080: * <li> 3 days
081: * <li> 1 week
082: * <li> 365 days
083: * <li> 12 hours, 30 minutes
084: * </ul>
085: *
086: * Valid interval names (in English) are:
087: *
088: * <ul>
089: * <li> "millisecond", "milliseconds", "ms"
090: * <li> "second", "seconds", "sec", "secs"
091: * <li> "minutes", "minutes", "min", "mins"
092: * <li> "hour", "hours", "hr", "hrs"
093: * <li> "day", "days"
094: * <li> "week", "weeks"
095: * </ul>
096: *
097: * <p>This plug-in uses the
098: * <a href="http://www.clapper.org/software/java/util/">org.clapper.util</a>
099: * library's
100: * <a href="http://www.clapper.org/software/java/util/javadocs/util/api/org/clapper/util/misc/Duration.html"><tt>Duration</tt></a>
101: * class to parse the age/duration values. See that class for more details.</p>
102: *
103: *
104: * <p>This plug-in intercepts the following configuration parameters.</p>
105: *
106: * <table border="1">
107: * <tr valign="top" align="left">
108: * <th>Section</th>
109: * <th>Parameter</th>
110: * <th>Meaning</th>
111: * <th>Default</th>
112: * </tr>
113: * <tr valign="top">
114: * <td><tt>[curn]</tt></td>
115: * <td><tt>ShowArticlesFor</tt></td>
116: * <td>Global default specifying how long to retain an article. Applies to
117: * all feeds that don't explicitly override this parameter.</td>
118: * <td>None. (Articles displayed only once.)</td>
119: * </tr>
120: * <tr valign="top">
121: * <td><tt>[Feed<i>xxx</i>]</tt></td>
122: * <td><tt>IgnoreArticlesOlderThan</tt></td>
123: * <td>Per-feed parameter specifying how long to retain an article.</td>
124: * <td>The global <tt>IgnoreArticlesOlderThan</tt> setting. If there is
125: * no global setting, then the default is to display articles only
126: * once.</td>
127: * </tr>
128: * </table>
129: *
130: * <p><b>WARNING</b>: Beware of interactions with the
131: * {@link IgnoreOldArticlesPlugIn} class. For instance, if you use
132: * "ShowArticlesFor" to show articles for 5 days, but you also use
133: * "IgnoreArticlesOlderThan" to discard articles older than 2 days,
134: * the "IgnoreArticlesOlderThan parameter takes precedence.
135: *
136: * @version <tt>$Revision: 7041 $</tt>
137: */
138: public class RetainArticlesPlugIn implements MainConfigItemPlugIn,
139: FeedConfigItemPlugIn, ForceFeedDownloadPlugIn,
140: PostFeedParsePlugIn {
141: /*----------------------------------------------------------------------*\
142: Private Constants
143: \*----------------------------------------------------------------------*/
144:
145: private static final String VAR_SHOW_ARTICLES_DURATION = "ShowArticlesFor";
146:
147: /*----------------------------------------------------------------------*\
148: Private Data Items
149: \*----------------------------------------------------------------------*/
150:
151: /**
152: * Feed duration data, by feed URL. This map contains configuration data.
153: */
154: private Map<URL, Duration> perFeedDuration = new HashMap<URL, Duration>();
155:
156: /**
157: * The global default
158: */
159: private Duration globalDefault = null;
160:
161: /**
162: * For logging
163: */
164: private static final Logger log = new Logger(
165: RetainArticlesPlugIn.class);
166:
167: /*----------------------------------------------------------------------*\
168: Constructor
169: \*----------------------------------------------------------------------*/
170:
171: /**
172: * Creates a new instance of <tt>RetainArticlesPlugIn</tt>
173: */
174: public RetainArticlesPlugIn() {
175: }
176:
177: /*----------------------------------------------------------------------*\
178: Public Methods
179: \*----------------------------------------------------------------------*/
180:
181: /**
182: * Get a displayable name for the plug-in.
183: *
184: * @return the name
185: */
186: public String getPlugInName() {
187: return "Retain Articles";
188: }
189:
190: /**
191: * Get the sort key for this plug-in.
192: *
193: * @return the sort key string.
194: */
195: public String getPlugInSortKey() {
196: return ClassUtil.getShortClassName(getClass().getName());
197: }
198:
199: /**
200: * Initialize the plug-in. This method is called before any of the
201: * plug-in methods are called.
202: *
203: * @throws CurnException on error
204: */
205: public void initPlugIn() throws CurnException {
206: }
207:
208: /**
209: * Called immediately after <i>curn</i> has read and processed a
210: * configuration item in the main [curn] configuration section. All
211: * configuration items are passed, one by one, to each loaded plug-in.
212: * If a plug-in class is not interested in a particular configuration
213: * item, this method should simply return without doing anything. Note
214: * that some configuration items may simply be variable assignment;
215: * there's no real way to distinguish a variable assignment from a
216: * blessed configuration item.
217: *
218: * @param sectionName the name of the configuration section where
219: * the item was found
220: * @param paramName the name of the parameter
221: * @param config the {@link CurnConfig} object
222: *
223: * @throws CurnException on error
224: *
225: * @see CurnConfig
226: */
227: public void runMainConfigItemPlugIn(String sectionName,
228: String paramName, CurnConfig config) throws CurnException {
229: try {
230: if (paramName.equals(VAR_SHOW_ARTICLES_DURATION)) {
231: try {
232: String sDuration = config.getConfigurationValue(
233: sectionName, paramName);
234: globalDefault = new Duration(sDuration);
235: log.debug("[" + sectionName + "] " + paramName
236: + "=" + globalDefault);
237: }
238:
239: catch (ParseException ex) {
240: throw new CurnException(
241: "Bad value for configuration "
242: + "variable \"" + paramName + "\" "
243: + "in section [" + sectionName
244: + "]", ex);
245: }
246:
247: }
248: }
249:
250: catch (ConfigurationException ex) {
251: throw new CurnException(ex);
252: }
253: }
254:
255: /**
256: * Called immediately after <i>curn</i> has read and processed a
257: * configuration item in a "feed" configuration section. All
258: * configuration items are passed, one by one, to each loaded plug-in.
259: * If a plug-in class is not interested in a particular configuration
260: * item, this method should simply return without doing anything. Note
261: * that some configuration items may simply be variable assignment;
262: * there's no real way to distinguish a variable assignment from a
263: * blessed configuration item.
264: *
265: * @param sectionName the name of the configuration section where
266: * the item was found
267: * @param paramName the name of the parameter
268: * @param config the active configuration
269: * @param feedInfo partially complete <tt>FeedInfo</tt> object
270: * for the feed. The URL is guaranteed to be
271: * present, but no other fields are.
272: *
273: * @return <tt>true</tt> to continue processing the feed,
274: * <tt>false</tt> to skip it
275: *
276: * @throws CurnException on error
277: *
278: * @see CurnConfig
279: * @see FeedInfo
280: * @see FeedInfo#getURL
281: */
282: public boolean runFeedConfigItemPlugIn(String sectionName,
283: String paramName, CurnConfig config, FeedInfo feedInfo)
284: throws CurnException {
285: try {
286: if (paramName.equals(VAR_SHOW_ARTICLES_DURATION)) {
287: try {
288: String sDuration = config.getConfigurationValue(
289: sectionName, paramName);
290: Duration duration = new Duration(sDuration);
291: URL feedURL = CurnUtil.normalizeURL(feedInfo
292: .getURL());
293: perFeedDuration.put(feedURL, duration);
294: if (log.isDebugEnabled()) {
295: log.debug("[" + sectionName + "] ("
296: + feedURL.toString() + ") " + paramName
297: + "=" + duration + " ("
298: + duration.format() + ")");
299: }
300: }
301:
302: catch (ParseException ex) {
303: log.error(ex);
304: throw new CurnException(
305: "Bad value for configuration "
306: + "variable \"" + paramName + "\" "
307: + "in section [" + sectionName
308: + "]", ex);
309: }
310: }
311: }
312:
313: catch (ConfigurationException ex) {
314: throw new CurnException(ex);
315: }
316:
317: return true;
318: }
319:
320: /**
321: * This method determines (based on some internal criteria) whether
322: * a given feed should be downloaded even if it hasn't changed. If multiple
323: * plug-ins implement this interface, then only one needs to return
324: * <tt>true</tt> for the feed download to be forced.
325: *
326: * @param feedInfo the {@link FeedInfo} object for the feed that
327: * has been downloaded and parsed.
328: * @param feedCache the feed cache, or null if there isn't one
329: *
330: * @return <tt>true</tt> if the feed should be downloaded and parsed
331: * even if it's not out of date; <tt>false</tt> if <i>curn</i>'s
332: * normal downloading rules should apply.
333: *
334: * @throws CurnException on error
335: */
336: public boolean forceFeedDownload(FeedInfo feedInfo,
337: FeedCache feedCache) throws CurnException {
338: URL feedURL = CurnUtil.normalizeURL(feedInfo.getURL());
339: Duration duration = perFeedDuration.get(feedURL);
340: if (duration == null)
341: duration = globalDefault;
342:
343: return (duration != null);
344: }
345:
346: /**
347: * Called immediately after a feed is parsed, but before it is
348: * otherwise processed. This method can return <tt>false</tt> to signal
349: * <i>curn</i> that the feed should be skipped. For instance, a plug-in
350: * that filters on the parsed feed data could use this method to weed
351: * out non-matching feeds before they are downloaded. Similarly, a
352: * plug-in that edits the parsed data (removing or editing individual
353: * items, for instance) could use method to do so.
354: *
355: * @param feedInfo the {@link FeedInfo} object for the feed that
356: * has been downloaded and parsed.
357: * @param feedCache the feed cache
358: * @param channel the parsed channel data
359: *
360: * @return <tt>true</tt> if <i>curn</i> should continue to process the
361: * feed, <tt>false</tt> to skip the feed. A return value of
362: * <tt>false</tt> aborts all further processing on the feed.
363: * In particular, <i>curn</i> will not pass the feed along to
364: * other plug-ins that have yet to be notified of this event.
365: *
366: * @throws CurnException on error
367: *
368: * @see RSSChannel
369: * @see FeedInfo
370: */
371: public boolean runPostFeedParsePlugIn(FeedInfo feedInfo,
372: FeedCache feedCache, RSSChannel channel)
373: throws CurnException {
374: URL feedURL = CurnUtil.normalizeURL(feedInfo.getURL());
375: log
376: .debug("Checking parsed feed \"" + feedURL.toString()
377: + "\"");
378: Duration duration = perFeedDuration.get(feedURL);
379: if (duration == null)
380: duration = globalDefault;
381:
382: if (duration != null) {
383: String feedURLString = feedURL.toString();
384: String sDuration = duration.format();
385: long durationMillis = duration.getDuration();
386:
387: log.debug("Articles in feed " + feedURL
388: + " should be shown for " + sDuration);
389:
390: long now = System.currentTimeMillis();
391: for (RSSItem item : channel.getItems()) {
392: FeedCacheEntry entry = null;
393: long itemCacheTime = now;
394: if (feedCache != null) {
395: entry = feedCache.getEntryForItem(item);
396: if (entry != null)
397: itemCacheTime = entry.getTimestamp();
398: }
399:
400: long itemAge = now - itemCacheTime;
401:
402: // Account for articles dated in the future. (There's no
403: // reason some doofus feed couldn't do that. And then there's
404: // always machine clock-skew.)
405:
406: if (itemAge < 0)
407: itemAge = 0;
408:
409: // Has the item passed the duration to be shown?
410:
411: Date cacheDate = new Date(itemCacheTime);
412: if (itemAge > durationMillis) {
413: log
414: .info("In feed " + feedURLString
415: + ", article " + item.getURL()
416: + " was cached " + cacheDate
417: + ", which is more than "
418: + sDuration + ". "
419: + "Suppressing article.");
420: channel.removeItem(item);
421: }
422:
423: else if (feedCache == null) {
424: log.info("No cache. Retaining article by default.");
425: }
426:
427: else {
428: entry = feedCache.getEntryForItem(item);
429: if (entry != null) {
430: log.info("In feed " + feedURLString
431: + ", previously seen article "
432: + item.getURL() + " was cached "
433: + cacheDate + ", which is less than "
434: + sDuration + ". "
435: + "Showing article again.");
436: entry.setSticky(true);
437: }
438: }
439: }
440: }
441:
442: return true;
443: }
444:
445: /*----------------------------------------------------------------------*\
446: Protected Methods
447: \*----------------------------------------------------------------------*/
448:
449: /*----------------------------------------------------------------------*\
450: Private Methods
451: \*----------------------------------------------------------------------*/
452: }
|