001: /*---------------------------------------------------------------------------*\
002: $Id: SaveAsRSSPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.Constants;
049: import org.clapper.curn.CurnConfig;
050: import org.clapper.curn.CurnException;
051: import org.clapper.curn.FeedInfo;
052: import org.clapper.curn.FeedConfigItemPlugIn;
053: import org.clapper.curn.PostConfigPlugIn;
054:
055: import org.clapper.util.classutil.ClassUtil;
056: import org.clapper.util.config.ConfigurationException;
057: import org.clapper.util.logging.Logger;
058:
059: import java.io.File;
060: import java.io.FileOutputStream;
061: import java.io.IOException;
062: import java.io.OutputStreamWriter;
063: import java.io.Writer;
064:
065: import java.util.Map;
066: import java.util.HashMap;
067: import java.util.Iterator;
068: import java.util.NoSuchElementException;
069: import org.clapper.curn.CurnUtil;
070: import org.clapper.curn.FeedCache;
071: import org.clapper.curn.PostFeedParsePlugIn;
072: import org.clapper.curn.output.freemarker.FreeMarkerFeedTransformer;
073: import org.clapper.curn.output.freemarker.TemplateLocation;
074: import org.clapper.curn.output.freemarker.TemplateType;
075: import org.clapper.curn.parser.RSSChannel;
076: import org.clapper.util.cmdline.CommandLineUsageException;
077: import org.clapper.util.cmdline.ParameterHandler;
078: import org.clapper.util.cmdline.ParameterParser;
079: import org.clapper.util.cmdline.UsageInfo;
080: import org.clapper.util.io.IOExceptionExt;
081: import org.clapper.util.text.TextUtil;
082:
083: /**
084: * <p>The <tt>SaveAsRSSPlugIn</tt> acts sort of like a single-feed output
085: * handler: It takes a feed that's been parsed, converts the parsed data to RSS
086: * or Atom format, and writes it to a file. It differs from an output handler in
087: * that an output handler must handle multiple feeds, whereas this plug-in
088: * handles a single feed at a time.</p>
089: *
090: * <p>This plug-in intercepts the following per-feed configuration
091: * parameters:</p>
092: *
093: * <table width="80%" class="nested-table" align="center">
094: * <tr valign="top">
095: * <td><tt>SaveAsRSS [options] path</tt></td>
096: * <td><i>path</i> is the path to the file to receive the RSS output.
097: *
098: * Options:
099: * <ul>
100: * <li><tt>-t <i>type</i></tt> (or <tt>--type <i>type</i></tt>)
101: * is the type of RSS output to generate. Currently, the legal
102: * values for the <i>type</i> argument are: "rss1", "rss2", "atom".
103: * If not specified, this option defaults to "atom".
104: * <li><tt>-b <i>backups</i></tt> (or <tt>--backups <i>backups</i></tt>)
105: * specifies how many backups of <i>path</i> to retain. Default: 0
106: * <li><tt>-e <i>encoding</i></tt> (or <tt>--encoding <i>encoding</i></tt>
107: * is encoding to use for the file. It defaults to "utf-8".
108: * </ul>
109: * Examples:
110: * <pre>
111: * SaveAsRSS: -t rss1 -b 3 -e iso-8859-1 ${user.home:.curn}/rss/foo.xml
112: * SaveAsRSS: --type atom --encoding utf16 C:/temp/foo.xml</pre>
113: * </td>
114: * </tr>
115: * <tr>
116: * <td><tt>SaveRSSOnly</tt></td>
117: * <td>If set to "true", this parameter indicates that the RSS file should
118: * generated, but that all further processing on the feed should be
119: * skip. In particular, the feed won't be passed to any other plug-ins,
120: * and it won't be passed to any output handlers. This parameter cannot
121: * be specified unless <tt>SaveAsRSS</tt> is also specified.</td>
122: * </tr>
123: * </table>
124: *
125: * <p>Note: If this plug-in is used in conjunction with the
126: * {@link RawFeedSaveAsPlugIn} class, and the {@link RawFeedSaveAsPlugIn}
127: * class's <tt>SaveOnly</tt> parameter is specified, this plug-in will
128: * <i>not</i> be invoked.</p>
129: *
130: * @version <tt>$Revision: 7041 $</tt>
131: */
132: public class SaveAsRSSPlugIn implements FeedConfigItemPlugIn,
133: PostConfigPlugIn, PostFeedParsePlugIn {
134: /*----------------------------------------------------------------------*\
135: Private Constants
136: \*----------------------------------------------------------------------*/
137:
138: private static final String VAR_SAVE_AS_RSS = "SaveAsRSS";
139: private static final String VAR_SAVE_RSS_ONLY = "SaveRSSOnly";
140: private static final String RSS1_TEMPLATE_PATH = "org/clapper/curn/output/freemarker/RSS1.ftl";
141: private static final String RSS2_TEMPLATE_PATH = "org/clapper/curn/output/freemarker/RSS2.ftl";
142: private static final String ATOM_TEMPLATE_PATH = "org/clapper/curn/output/freemarker/Atom.ftl";
143:
144: /*----------------------------------------------------------------------*\
145: Private Classes
146: \*----------------------------------------------------------------------*/
147:
148: /**
149: * Feed save info
150: */
151: class FeedSaveInfo {
152: String sectionName;
153: File saveAsFile;
154: boolean saveOnly;
155: String saveAsEncoding = "utf-8";
156: TemplateLocation templateLocation = null;
157: int backups = 0;
158:
159: FeedSaveInfo() {
160: // Nothing to do
161: }
162: }
163:
164: /*----------------------------------------------------------------------*\
165: Private Data Items
166: \*----------------------------------------------------------------------*/
167:
168: /**
169: * Feed save data, by feed
170: */
171: private Map<FeedInfo, FeedSaveInfo> perFeedSaveAsMap = new HashMap<FeedInfo, FeedSaveInfo>();
172:
173: /**
174: * Saved reference to the configuration
175: */
176: private CurnConfig config = null;
177:
178: /**
179: * For log messages
180: */
181: private static final Logger log = new Logger(SaveAsRSSPlugIn.class);
182:
183: /*----------------------------------------------------------------------*\
184: Constructor
185: \*----------------------------------------------------------------------*/
186:
187: /**
188: * Default constructor (required).
189: */
190: public SaveAsRSSPlugIn() {
191: // Nothing to do
192: }
193:
194: /*----------------------------------------------------------------------*\
195: Public Methods Required by *PlugIn Interfaces
196: \*----------------------------------------------------------------------*/
197:
198: /**
199: * Get a displayable name for the plug-in.
200: *
201: * @return the name
202: */
203: public String getPlugInName() {
204: return "Save As RSS";
205: }
206:
207: /**
208: * Get the sort key for this plug-in.
209: *
210: * @return the sort key string.
211: */
212: public String getPlugInSortKey() {
213: return ClassUtil.getShortClassName(getClass().getName());
214: }
215:
216: /**
217: * Initialize the plug-in. This method is called before any of the
218: * plug-in methods are called.
219: *
220: * @throws CurnException on error
221: */
222: public void initPlugIn() throws CurnException {
223: }
224:
225: /**
226: * Called immediately after <i>curn</i> has read and processed a
227: * configuration item in a "feed" configuration section. All
228: * configuration items are passed, one by one, to each loaded plug-in.
229: * If a plug-in class is not interested in a particular configuration
230: * item, this method should simply return without doing anything. Note
231: * that some configuration items may simply be variable assignment;
232: * there's no real way to distinguish a variable assignment from a
233: * blessed configuration item.
234: *
235: * @param sectionName the name of the configuration section where
236: * the item was found
237: * @param paramName the name of the parameter
238: * @param config the active configuration
239: * @param feedInfo partially complete <tt>FeedInfo</tt> object
240: * for the feed. The URL is guaranteed to be
241: * present, but no other fields are.
242: *
243: * @return <tt>true</tt> to continue processing the feed,
244: * <tt>false</tt> to skip it
245: *
246: * @throws CurnException on error
247: *
248: * @see CurnConfig
249: * @see FeedInfo
250: * @see FeedInfo#getURL
251: */
252: public boolean runFeedConfigItemPlugIn(String sectionName,
253: String paramName, CurnConfig config, FeedInfo feedInfo)
254: throws CurnException {
255: try {
256: if (paramName.equals(VAR_SAVE_AS_RSS)) {
257: handleSaveAsConfigParam(sectionName, paramName, config,
258: feedInfo);
259: }
260:
261: else if (paramName.equals(VAR_SAVE_RSS_ONLY)) {
262: FeedSaveInfo saveInfo = getOrMakeFeedSaveInfo(feedInfo);
263: saveInfo.saveOnly = config.getOptionalBooleanValue(
264: sectionName, paramName, false);
265: saveInfo.sectionName = sectionName;
266: log.debug("[" + sectionName + "]: SaveRSSOnly="
267: + saveInfo.saveOnly);
268: }
269:
270: return true;
271: }
272:
273: catch (ConfigurationException ex) {
274: throw new CurnException(ex);
275: }
276: }
277:
278: /**
279: * Called after the entire configuration has been read and parsed, but
280: * before any feeds are processed. Intercepting this event is useful
281: * for plug-ins that want to adjust the configuration. For instance,
282: * the <i>curn</i> command-line wrapper intercepts this plug-in event
283: * so it can adjust the configuration to account for command line
284: * options.
285: *
286: * @param config the parsed {@link CurnConfig} object
287: *
288: * @throws CurnException on error
289: *
290: * @see CurnConfig
291: */
292: public void runPostConfigPlugIn(CurnConfig config)
293: throws CurnException {
294: this .config = config;
295:
296: for (FeedInfo feedInfo : perFeedSaveAsMap.keySet()) {
297: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
298:
299: if (saveInfo.saveOnly && (saveInfo.saveAsFile == null)) {
300: throw new CurnException(
301: Constants.BUNDLE_NAME,
302: "CurnConfig.saveOnlyButNoSaveAs",
303: "Configuration section \"{0}\": "
304: + "\"[1}\" may only be specified if \"{2}\" is set.",
305: new Object[] { saveInfo.sectionName,
306: VAR_SAVE_RSS_ONLY, VAR_SAVE_AS_RSS });
307: }
308: }
309: }
310:
311: /**
312: * <p>Called just after the feed has been parsed, but before it is
313: * otherwise processed.
314: *
315: * @param feedInfo the {@link FeedInfo} object for the feed
316: * @param feedCache the feed cache
317: * @param channel the parsed feed data
318: *
319: * @return <tt>true</tt> if <i>curn</i> should continue to process the
320: * feed, <tt>false</tt> to skip the feed
321: *
322: * @throws CurnException on error
323: *
324: * @see FeedInfo
325: * @see RSSChannel
326: */
327: public boolean runPostFeedParsePlugIn(final FeedInfo feedInfo,
328: final FeedCache feedCache, final RSSChannel channel)
329: throws CurnException {
330: boolean keepGoing = true;
331: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
332:
333: if ((saveInfo != null) && (saveInfo.saveAsFile != null)) {
334: // Create a feed transformer and set the invariant stuff.
335:
336: FreeMarkerFeedTransformer feedTransformer = new FreeMarkerFeedTransformer(
337: config, true);
338: feedTransformer.setEncoding(saveInfo.saveAsEncoding);
339: feedTransformer.setTemplate(saveInfo.templateLocation,
340: "text/xml");
341:
342: // Now, add the channel.
343:
344: feedTransformer.addChannel(channel, feedInfo, true);
345:
346: // Now, transform the feed.
347:
348: try {
349: log.debug("Generating RSS output file \""
350: + saveInfo.saveAsFile + "\" (encoding "
351: + saveInfo.saveAsEncoding + ")");
352:
353: Writer out = CurnUtil.openOutputFile(
354: saveInfo.saveAsFile, saveInfo.saveAsEncoding,
355: CurnUtil.IndexMarker.BEFORE_EXTENSION,
356: saveInfo.backups);
357:
358: new OutputStreamWriter(new FileOutputStream(
359: saveInfo.saveAsFile), saveInfo.saveAsEncoding);
360: feedTransformer.transform(out);
361: out.close();
362: }
363:
364: catch (IOExceptionExt ex) {
365: throw new CurnException("Can't write RSS output to \""
366: + saveInfo.saveAsFile + "\": ", ex);
367: }
368:
369: catch (IOException ex) {
370: throw new CurnException("Can't write RSS output to \""
371: + saveInfo.saveAsFile + "\": ", ex);
372: }
373:
374: keepGoing = !saveInfo.saveOnly;
375: }
376:
377: return keepGoing;
378: }
379:
380: /*----------------------------------------------------------------------*\
381: Private Methods
382: \*----------------------------------------------------------------------*/
383:
384: private FeedSaveInfo getOrMakeFeedSaveInfo(FeedInfo feedInfo) {
385: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
386: if (saveInfo == null) {
387: saveInfo = new FeedSaveInfo();
388: perFeedSaveAsMap.put(feedInfo, saveInfo);
389: }
390:
391: return saveInfo;
392: }
393:
394: private void handleSaveAsConfigParam(final String sectionName,
395: final String paramName, final CurnConfig config,
396: final FeedInfo feedInfo) throws CurnException,
397: ConfigurationException {
398: final FeedSaveInfo saveInfo = getOrMakeFeedSaveInfo(feedInfo);
399:
400: // Parse the value as a command line.
401:
402: UsageInfo usageInfo = new UsageInfo();
403: usageInfo.addOption('b', "backups", "<n>",
404: "Number of backups to keep");
405: usageInfo.addOption('t', "type", "<rss1|rss2|atom>",
406: "RSS type for output.");
407: usageInfo.addOption('e', "encoding", "<encoding>",
408: "Desired output encoding");
409: usageInfo.addParameter("<path>", "Path to RSS output file",
410: true);
411:
412: // Inner class for handling command-line syntax of the value.
413:
414: class ConfigParameterHandler implements ParameterHandler {
415: String templatePath = ATOM_TEMPLATE_PATH;
416: private String rawValue;
417:
418: ConfigParameterHandler(String rawValue) {
419: this .rawValue = rawValue;
420: }
421:
422: public void parseOption(char shortOption,
423: String longOption, Iterator<String> it)
424: throws CommandLineUsageException,
425: NoSuchElementException {
426: String value;
427: switch (shortOption) {
428: case 'b':
429: value = it.next();
430: try {
431: saveInfo.backups = Integer.parseInt(value);
432: }
433:
434: catch (NumberFormatException ex) {
435: throw new CommandLineUsageException("Section ["
436: + sectionName + "], parameter \""
437: + paramName + "\": "
438: + "Unexpected non-numeric value \""
439: + value + "\" for \""
440: + UsageInfo.SHORT_OPTION_PREFIX
441: + shortOption + "\" option.");
442: }
443: break;
444:
445: case 't':
446: value = it.next();
447: if (value.equalsIgnoreCase("rss1"))
448: templatePath = RSS1_TEMPLATE_PATH;
449: else if (value.equalsIgnoreCase("rss2"))
450: templatePath = RSS2_TEMPLATE_PATH;
451: else if (value.equalsIgnoreCase("atom"))
452: templatePath = ATOM_TEMPLATE_PATH;
453: else {
454: throw new CommandLineUsageException(
455: "Section \"" + sectionName
456: + "\": Parameter \""
457: + paramName
458: + "\" has unknown RSS type \""
459: + value + "\"");
460: }
461:
462: break;
463:
464: case 'e':
465: saveInfo.saveAsEncoding = it.next();
466: break;
467:
468: default:
469: throw new CommandLineUsageException("Section ["
470: + sectionName + "], parameter \""
471: + paramName + "\": " + "Unknown option \""
472: + UsageInfo.SHORT_OPTION_PREFIX
473: + shortOption + "\" in value \"" + rawValue
474: + "\"");
475: }
476: }
477:
478: public void parsePostOptionParameters(Iterator<String> it)
479: throws CommandLineUsageException,
480: NoSuchElementException {
481: saveInfo.saveAsFile = CurnUtil.mapConfiguredPathName(it
482: .next());
483: }
484: }
485: ;
486:
487: // Parse the parameters.
488:
489: ParameterParser paramParser = new ParameterParser(usageInfo);
490: String rawValue = config.getConfigurationValue(sectionName,
491: paramName);
492: try {
493: String[] valueTokens = config.getConfigurationTokens(
494: sectionName, paramName);
495: if (log.isDebugEnabled()) {
496: log.debug("[" + sectionName + "]: SaveAsRSS: value=\""
497: + rawValue + "\", tokens="
498: + TextUtil.join(valueTokens, '|'));
499: }
500:
501: ConfigParameterHandler handler = new ConfigParameterHandler(
502: rawValue);
503: log.debug("Parsing value \"" + rawValue + "\"");
504: paramParser.parse(valueTokens, handler);
505:
506: // Save values the parser could not.
507:
508: saveInfo.templateLocation = new TemplateLocation(
509: TemplateType.CLASSPATH, handler.templatePath);
510: }
511:
512: catch (CommandLineUsageException ex) {
513: throw new CurnException("Section [" + sectionName
514: + "], parameter \"" + paramName
515: + "\": Error parsing value \"" + rawValue + "\"",
516: ex);
517: }
518: }
519: }
|