001: /*---------------------------------------------------------------------------*\
002: $Id: EmptyArticleSummaryPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.CurnConfig;
049: import org.clapper.curn.CurnException;
050: import org.clapper.curn.FeedInfo;
051: import org.clapper.curn.FeedConfigItemPlugIn;
052: import org.clapper.curn.PostFeedParsePlugIn;
053: import org.clapper.curn.MainConfigItemPlugIn;
054: import org.clapper.curn.CurnUtil;
055: import org.clapper.curn.parser.RSSChannel;
056: import org.clapper.curn.parser.RSSItem;
057:
058: import org.clapper.util.classutil.ClassUtil;
059: import org.clapper.util.config.ConfigurationException;
060: import org.clapper.util.logging.Logger;
061:
062: import java.util.HashMap;
063: import java.util.Map;
064: import org.clapper.curn.FeedCache;
065:
066: /**
067: * The <tt>EmptyArticleSummaryPlugIn</tt> provides a way to handle an empty
068: * summary. It intercepts the following configuration parameters:
069: *
070: * <table border="1">
071: * <tr valign="top" align="left">
072: * <th>Section</th>
073: * <th>Parameter</th>
074: * <th>Meaning</th>
075: * </tr>
076: * <tr valign="top">
077: * <td><tt>[curn]</tt></td>
078: * <td><tt>SummaryOnly</tt></td>
079: * <td>DEPRECATED. Equivalent to
080: * <tt>ReplaceEmptySummaryWith: nothing</tt></td>
081: * </tr>
082: * <tr valign="top">
083: * <td><tt>[curn]</tt></td>
084: * <td><tt>ReplaceEmptySummaryWith</tt></td>
085: * <td>How to handle an empty summary field. Possible values:
086: * <ul>
087: * <li> <tt>nothing</tt>: do nothing
088: * <li> <tt>content</tt>: use the content, if any
089: * </ul>
090: * This setting defines the default value for feeds that
091: * don't specify their own <tt>ReplaceEmptySummaryWith</tt>
092: * parameter. If not specified, it defaults to <tt>content</tt>.</td>
093: * </tr>
094: * <tr valign="top">
095: * <td><tt>[Feed<i>xxx</i>]</tt></td>
096: * <td><tt>SummaryOnly</tt></td>
097: * <td>DEPRECATED. Equivalent to
098: * <tt>ReplaceEmptySummaryWith: nothing</tt></td>
099: * </td>
100: * </tr>
101: * <tr valign="top">
102: * <td><tt>[Feed<i>xxx</i>]</tt></td>
103: * <td><tt>ReplaceEmptySummaryWith</tt></td>
104: * <td>Overrides the global default setting for a given feed.</td>
105: * </tr>
106: * </table>
107: *
108: * @version <tt>$Revision: 7041 $</tt>
109: */
110: public class EmptyArticleSummaryPlugIn implements MainConfigItemPlugIn,
111: FeedConfigItemPlugIn, PostFeedParsePlugIn {
112: /*----------------------------------------------------------------------*\
113: Private Constants
114: \*----------------------------------------------------------------------*/
115:
116: public static final String VAR_SUMMARY_ONLY = "SummaryOnly";
117: public static final String VAR_REPLACE_EMPTY_SUMMARY = "ReplaceEmptySummaryWith";
118:
119: private static Map<String, ReplacementType> LEGAL_VALUES_MAP = new HashMap<String, ReplacementType>();
120: static {
121: for (ReplacementType r : ReplacementType.values())
122: LEGAL_VALUES_MAP.put(r.toString().toLowerCase(), r);
123: }
124:
125: /*----------------------------------------------------------------------*\
126: Inner Classes
127: \*----------------------------------------------------------------------*/
128:
129: private static enum ReplacementType {
130: NOTHING, CONTENT, TITLE
131: };
132:
133: /*----------------------------------------------------------------------*\
134: Private Data Items
135: \*----------------------------------------------------------------------*/
136:
137: /**
138: * Feed save data, by feed
139: */
140: private Map<FeedInfo, ReplacementType> perFeedSetting = new HashMap<FeedInfo, ReplacementType>();
141:
142: /**
143: * The global default
144: */
145: private ReplacementType globalDefault = ReplacementType.CONTENT;
146:
147: /**
148: * For log messages
149: */
150: private static final Logger log = new Logger(
151: EmptyArticleSummaryPlugIn.class);
152:
153: /*----------------------------------------------------------------------*\
154: Constructor
155: \*----------------------------------------------------------------------*/
156:
157: /**
158: * Default constructor (required).
159: */
160: public EmptyArticleSummaryPlugIn() {
161: // Nothing to do
162: }
163:
164: /*----------------------------------------------------------------------*\
165: Public Methods Required by *PlugIn Interfaces
166: \*----------------------------------------------------------------------*/
167:
168: /**
169: * Get a displayable name for the plug-in.
170: *
171: * @return the name
172: */
173: public String getPlugInName() {
174: return "Empty Article Summary";
175: }
176:
177: /**
178: * Get the sort key for this plug-in.
179: *
180: * @return the sort key string.
181: */
182: public String getPlugInSortKey() {
183: return ClassUtil.getShortClassName(getClass().getName());
184: }
185:
186: /**
187: * Initialize the plug-in. This method is called before any of the
188: * plug-in methods are called.
189: *
190: * @throws CurnException on error
191: */
192: public void initPlugIn() throws CurnException {
193: }
194:
195: /**
196: * Called immediately after <i>curn</i> has read and processed a
197: * configuration item in the main [curn] configuration section. All
198: * configuration items are passed, one by one, to each loaded plug-in.
199: * If a plug-in class is not interested in a particular configuration
200: * item, this method should simply return without doing anything. Note
201: * that some configuration items may simply be variable assignment;
202: * there's no real way to distinguish a variable assignment from a
203: * blessed configuration item.
204: *
205: * @param sectionName the name of the configuration section where
206: * the item was found
207: * @param paramName the name of the parameter
208: * @param config the {@link CurnConfig} object
209: *
210: * @throws CurnException on error
211: *
212: * @see CurnConfig
213: */
214: public void runMainConfigItemPlugIn(String sectionName,
215: String paramName, CurnConfig config) throws CurnException {
216: try {
217: if (paramName.equals(VAR_SUMMARY_ONLY)) {
218: String msg = config.getDeprecatedParamMessage(
219: paramName, VAR_REPLACE_EMPTY_SUMMARY);
220: CurnUtil.getErrorOut().println(msg);
221: log.warn(msg);
222: boolean on = config.getRequiredBooleanValue(
223: sectionName, paramName);
224: if (on)
225: globalDefault = ReplacementType.NOTHING;
226: }
227:
228: else if (paramName.equals(VAR_REPLACE_EMPTY_SUMMARY)) {
229: String value = config.getConfigurationValue(
230: sectionName, paramName);
231: ReplacementType type = LEGAL_VALUES_MAP.get(value);
232: if (type == null) {
233: throw new CurnException("Bad value \"" + value
234: + "\" for \"" + paramName
235: + " parameter in [" + sectionName
236: + "] section.");
237: }
238:
239: globalDefault = type;
240: log.debug("[" + sectionName + "] " + paramName + "="
241: + type);
242: }
243: }
244:
245: catch (ConfigurationException ex) {
246: throw new CurnException(ex);
247: }
248: }
249:
250: /**
251: * Called immediately after <i>curn</i> has read and processed a
252: * configuration item in a "feed" configuration section. All
253: * configuration items are passed, one by one, to each loaded plug-in.
254: * If a plug-in class is not interested in a particular configuration
255: * item, this method should simply return without doing anything. Note
256: * that some configuration items may simply be variable assignment;
257: * there's no real way to distinguish a variable assignment from a
258: * blessed configuration item.
259: *
260: * @param sectionName the name of the configuration section where
261: * the item was found
262: * @param paramName the name of the parameter
263: * @param config the active configuration
264: * @param feedInfo partially complete <tt>FeedInfo</tt> object
265: * for the feed. The URL is guaranteed to be
266: * present, but no other fields are.
267: *
268: * @return <tt>true</tt> to continue processing the feed,
269: * <tt>false</tt> to skip it
270: *
271: * @throws CurnException on error
272: *
273: * @see CurnConfig
274: * @see FeedInfo
275: * @see FeedInfo#getURL
276: */
277: public boolean runFeedConfigItemPlugIn(String sectionName,
278: String paramName, CurnConfig config, FeedInfo feedInfo)
279: throws CurnException {
280: try {
281: if (paramName.equals(VAR_SUMMARY_ONLY)) {
282: String msg = config.getDeprecatedParamMessage(
283: paramName, VAR_REPLACE_EMPTY_SUMMARY);
284: CurnUtil.getErrorOut().println(msg);
285: log.warn(msg);
286: boolean on = config.getRequiredBooleanValue(
287: sectionName, paramName);
288: if (on)
289: perFeedSetting.put(feedInfo,
290: ReplacementType.NOTHING);
291: }
292:
293: else if (paramName.equals(VAR_REPLACE_EMPTY_SUMMARY)) {
294: String value = config.getConfigurationValue(
295: sectionName, paramName);
296: ReplacementType type = LEGAL_VALUES_MAP.get(value);
297: if (type == null) {
298: throw new CurnException("Bad value \"" + value
299: + "\" for \"" + paramName
300: + " parameter in [" + sectionName
301: + "] section.");
302: }
303:
304: perFeedSetting.put(feedInfo, type);
305: log.debug("[" + sectionName + "] " + paramName + "="
306: + type);
307: }
308: }
309:
310: catch (ConfigurationException ex) {
311: throw new CurnException(ex);
312: }
313:
314: return true;
315: }
316:
317: /**
318: * Called immediately after a feed is parsed, but before it is
319: * otherwise processed. This method can return <tt>false</tt> to signal
320: * <i>curn</i> that the feed should be skipped. For instance, a plug-in
321: * that filters on the parsed feed data could use this method to weed
322: * out non-matching feeds before they are downloaded. Similarly, a
323: * plug-in that edits the parsed data (removing or editing individual
324: * items, for instance) could use method to do so.
325: *
326: * @param feedInfo the {@link FeedInfo} object for the feed that
327: * has been downloaded and parsed.
328: * @param feedCache the feed cache
329: * @param channel the parsed channel data
330: *
331: * @return <tt>true</tt> if <i>curn</i> should continue to process the
332: * feed, <tt>false</tt> to skip the feed. A return value of
333: * <tt>false</tt> aborts all further processing on the feed.
334: * In particular, <i>curn</i> will not pass the feed along to
335: * other plug-ins that have yet to be notified of this event.
336: *
337: * @throws CurnException on error
338: *
339: * @see RSSChannel
340: * @see FeedInfo
341: */
342: public boolean runPostFeedParsePlugIn(FeedInfo feedInfo,
343: FeedCache feedCache, RSSChannel channel)
344: throws CurnException {
345: ReplacementType type = perFeedSetting.get(feedInfo);
346: if (type == null)
347: type = globalDefault;
348:
349: switch (type) {
350: case NOTHING:
351: break;
352:
353: case CONTENT:
354: log.debug("Replacing empty summaries with content in "
355: + "feed \"" + feedInfo.getURL().toString() + "\"");
356: String s;
357: for (RSSItem item : channel.getItems()) {
358: if (item.getSummary() == null) {
359: s = item.getFirstContentOfType("text/html",
360: "text/plain");
361: if (s != null)
362: item.setSummary(s);
363: }
364: }
365: break;
366:
367: case TITLE:
368: log.debug("Replacing empty summaries with title in "
369: + "feed \"" + feedInfo.getURL().toString() + "\"");
370: for (RSSItem item : channel.getItems()) {
371: if (item.getSummary() == null)
372: item.setSummary(item.getTitle());
373: }
374: break;
375:
376: default:
377: assert (false);
378: }
379:
380: return true;
381: }
382: }
|