001: /*---------------------------------------------------------------------------*\
002: $Id: RawFeedSaveAsPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.Constants;
049: import org.clapper.curn.CurnConfig;
050: import org.clapper.curn.CurnException;
051: import org.clapper.curn.FeedInfo;
052: import org.clapper.curn.FeedConfigItemPlugIn;
053: import org.clapper.curn.PostConfigPlugIn;
054: import org.clapper.curn.PreFeedDownloadPlugIn;
055: import org.clapper.curn.PostFeedDownloadPlugIn;
056:
057: import org.clapper.util.classutil.ClassUtil;
058: import org.clapper.util.config.ConfigurationException;
059: import org.clapper.util.io.FileUtil;
060: import org.clapper.util.logging.Logger;
061:
062: import java.io.File;
063: import java.io.FileInputStream;
064: import java.io.FileReader;
065: import java.io.IOException;
066: import java.io.InputStreamReader;
067: import java.io.Reader;
068: import java.io.Writer;
069:
070: import java.net.URLConnection;
071:
072: import java.util.Map;
073: import java.util.HashMap;
074: import java.util.Iterator;
075: import java.util.NoSuchElementException;
076: import org.clapper.curn.CurnUtil;
077: import org.clapper.util.cmdline.CommandLineUsageException;
078: import org.clapper.util.cmdline.ParameterHandler;
079: import org.clapper.util.cmdline.ParameterParser;
080: import org.clapper.util.cmdline.UsageInfo;
081: import org.clapper.util.io.IOExceptionExt;
082: import org.clapper.util.text.TextUtil;
083:
084: /**
085: * The <tt>RawFeedSaveAsPlugIn</tt> handles saving a feed to a known location.
086: * It intercepts the following per-feed configuration parameters:
087: *
088: * <table border="1">
089: * <tr valign="top">
090: * <td><tt>SaveAs</tt></td>
091: * <td>Path to file where raw XML should be saved.</td>
092: * </tr>
093: * <tr valign="top">
094: * <td><tt>SaveOnly</tt></td>
095: * <td>If set to "true", this parameter indicates that raw XML should be
096: * saved, but not parsed. This parameter can only be specified if
097: * <tt>SaveAs</tt> is also specified.</td>
098: * </tr>
099: * <tr valign="top">
100: * <td><tt>SaveAsEncoding</tt></td>
101: * <td>The character set encoding to use when saving the file. Default:
102: * "utf-8"</td>
103: * </tr>
104: * </table>
105: *
106: * @version <tt>$Revision: 7041 $</tt>
107: */
108: public class RawFeedSaveAsPlugIn implements FeedConfigItemPlugIn,
109: PostConfigPlugIn, PostFeedDownloadPlugIn {
110: /*----------------------------------------------------------------------*\
111: Private Constants
112: \*----------------------------------------------------------------------*/
113:
114: private static final String VAR_SAVE_FEED_AS = "SaveAs";
115: private static final String VAR_SAVE_ONLY = "SaveOnly";
116: private static final String VAR_SAVE_AS_ENCODING = "SaveAsEncoding";
117:
118: /*----------------------------------------------------------------------*\
119: Private Classes
120: \*----------------------------------------------------------------------*/
121:
122: /**
123: * Feed save info
124: */
125: class FeedSaveInfo {
126: String sectionName;
127: File saveAsFile;
128: int backups = 0;
129: boolean saveOnly;
130: String saveAsEncoding = "utf-8";
131:
132: FeedSaveInfo() {
133: // Nothing to do
134: }
135: }
136:
137: /*----------------------------------------------------------------------*\
138: Private Data Items
139: \*----------------------------------------------------------------------*/
140:
141: /**
142: * Feed save data, by feed
143: */
144: private Map<FeedInfo, FeedSaveInfo> perFeedSaveAsMap = new HashMap<FeedInfo, FeedSaveInfo>();
145:
146: /**
147: * Saved reference to the configuration
148: */
149: private CurnConfig config = null;
150:
151: /**
152: * For log messages
153: */
154: private static final Logger log = new Logger(
155: RawFeedSaveAsPlugIn.class);
156:
157: /*----------------------------------------------------------------------*\
158: Constructor
159: \*----------------------------------------------------------------------*/
160:
161: /**
162: * Default constructor (required).
163: */
164: public RawFeedSaveAsPlugIn() {
165: // Nothing to do
166: }
167:
168: /*----------------------------------------------------------------------*\
169: Public Methods Required by *PlugIn Interfaces
170: \*----------------------------------------------------------------------*/
171:
172: /**
173: * Get a displayable name for the plug-in.
174: *
175: * @return the name
176: */
177: public String getPlugInName() {
178: return "Save As";
179: }
180:
181: /**
182: * Get the sort key for this plug-in.
183: *
184: * @return the sort key string.
185: */
186: public String getPlugInSortKey() {
187: return ClassUtil.getShortClassName(getClass().getName());
188: }
189:
190: /**
191: * Initialize the plug-in. This method is called before any of the
192: * plug-in methods are called.
193: *
194: * @throws CurnException on error
195: */
196: public void initPlugIn() throws CurnException {
197: }
198:
199: /**
200: * Called immediately after <i>curn</i> has read and processed a
201: * configuration item in a "feed" configuration section. All
202: * configuration items are passed, one by one, to each loaded plug-in.
203: * If a plug-in class is not interested in a particular configuration
204: * item, this method should simply return without doing anything. Note
205: * that some configuration items may simply be variable assignment;
206: * there's no real way to distinguish a variable assignment from a
207: * blessed configuration item.
208: *
209: * @param sectionName the name of the configuration section where
210: * the item was found
211: * @param paramName the name of the parameter
212: * @param config the active configuration
213: * @param feedInfo partially complete <tt>FeedInfo</tt> object
214: * for the feed. The URL is guaranteed to be
215: * present, but no other fields are.
216: *
217: * @return <tt>true</tt> to continue processing the feed,
218: * <tt>false</tt> to skip it
219: *
220: * @throws CurnException on error
221: *
222: * @see CurnConfig
223: * @see FeedInfo
224: * @see FeedInfo#getURL
225: */
226: public boolean runFeedConfigItemPlugIn(String sectionName,
227: String paramName, CurnConfig config, FeedInfo feedInfo)
228: throws CurnException {
229: try {
230: if (paramName.equals(VAR_SAVE_FEED_AS)) {
231: handleSaveAsConfigParam(sectionName, paramName, config,
232: feedInfo);
233: }
234:
235: else if (paramName.equals(VAR_SAVE_ONLY)) {
236: FeedSaveInfo saveInfo = getOrMakeFeedSaveInfo(feedInfo);
237: saveInfo.saveOnly = config.getOptionalBooleanValue(
238: sectionName, paramName, false);
239: saveInfo.sectionName = sectionName;
240: log.debug("[" + sectionName + "]: SaveOnly="
241: + saveInfo.saveOnly);
242: }
243:
244: else if (paramName.equals(VAR_SAVE_AS_ENCODING)) {
245: String msg = config.getDeprecatedParamMessage(
246: paramName, VAR_SAVE_FEED_AS);
247: CurnUtil.getErrorOut().println(msg);
248: log.warn(msg);
249:
250: FeedSaveInfo saveInfo = getOrMakeFeedSaveInfo(feedInfo);
251: saveInfo.saveAsEncoding = config.getConfigurationValue(
252: sectionName, paramName);
253: saveInfo.sectionName = sectionName;
254: log.debug("[" + sectionName + "]: SaveAsEncoding="
255: + saveInfo.saveAsEncoding);
256: }
257:
258: return true;
259: }
260:
261: catch (ConfigurationException ex) {
262: throw new CurnException(ex);
263: }
264: }
265:
266: /**
267: * Called after the entire configuration has been read and parsed, but
268: * before any feeds are processed. Intercepting this event is useful
269: * for plug-ins that want to adjust the configuration. For instance,
270: * the <i>curn</i> command-line wrapper intercepts this plug-in event
271: * so it can adjust the configuration to account for command line
272: * options.
273: *
274: * @param config the parsed {@link CurnConfig} object
275: *
276: * @throws CurnException on error
277: *
278: * @see CurnConfig
279: */
280: public void runPostConfigPlugIn(CurnConfig config)
281: throws CurnException {
282: this .config = config;
283:
284: for (FeedInfo feedInfo : perFeedSaveAsMap.keySet()) {
285: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
286:
287: if (saveInfo.saveOnly && (saveInfo.saveAsFile == null)) {
288: throw new CurnException(
289: Constants.BUNDLE_NAME,
290: "CurnConfig.saveOnlyButNoSaveAs",
291: "Configuration section \"{0}\": "
292: + "\"[1}\" may only be specified if \"{2}\" is set.",
293: new Object[] { saveInfo.sectionName,
294: VAR_SAVE_ONLY, VAR_SAVE_FEED_AS });
295: }
296: }
297: }
298:
299: /**
300: * Called immediately after a feed is downloaded. This method can
301: * return <tt>false</tt> to signal <i>curn</i> that the feed should be
302: * skipped. For instance, a plug-in that filters on the unparsed XML
303: * feed content could use this method to weed out non-matching feeds
304: * before they are downloaded.
305: *
306: * @param feedInfo the {@link FeedInfo} object for the feed that
307: * has been downloaded
308: * @param feedDataFile the file containing the downloaded, unparsed feed
309: * XML. <b><i>curn</i> may delete this file after all
310: * plug-ins are notified!</b>
311: * @param encoding the encoding used to store the data in the file,
312: * or null for the default
313: *
314: * @return <tt>true</tt> if <i>curn</i> should continue to process the
315: * feed, <tt>false</tt> to skip the feed. A return value of
316: * <tt>false</tt> aborts all further processing on the feed.
317: * In particular, <i>curn</i> will not pass the feed along to
318: * other plug-ins that have yet to be notified of this event.
319: *
320: * @throws CurnException on error
321: *
322: * @see FeedInfo
323: */
324: public boolean runPostFeedDownloadPlugIn(FeedInfo feedInfo,
325: File feedDataFile, String encoding) throws CurnException {
326: boolean keepGoing = true;
327: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
328:
329: if ((saveInfo != null) && (saveInfo.saveAsFile != null)) {
330: try {
331: String s = ((encoding == null) ? "default" : encoding);
332: log.debug("Copying temporary file \""
333: + feedDataFile.getPath() + "\" (encoding " + s
334: + ") to \"" + saveInfo.saveAsFile.getPath()
335: + "\" (encoding " + saveInfo.saveAsEncoding
336: + ")");
337:
338: Writer out = CurnUtil.openOutputFile(
339: saveInfo.saveAsFile, saveInfo.saveAsEncoding,
340: CurnUtil.IndexMarker.BEFORE_EXTENSION,
341: saveInfo.backups);
342:
343: Reader in;
344: if (encoding == null) {
345: in = new FileReader(feedDataFile);
346: } else {
347: in = new InputStreamReader(new FileInputStream(
348: feedDataFile), encoding);
349: }
350: FileUtil.copyReader(in, out);
351: out.close();
352: in.close();
353: }
354:
355: catch (IOExceptionExt ex) {
356: throw new CurnException("Can't copy \""
357: + feedDataFile.getPath() + "\" to \""
358: + saveInfo.saveAsFile.getPath() + "\": ", ex);
359: }
360:
361: catch (IOException ex) {
362: throw new CurnException("Can't copy \""
363: + feedDataFile.getPath() + "\" to \""
364: + saveInfo.saveAsFile.getPath() + "\": ", ex);
365: }
366:
367: keepGoing = !saveInfo.saveOnly;
368: }
369:
370: return keepGoing;
371: }
372:
373: /*----------------------------------------------------------------------*\
374: Private Methods
375: \*----------------------------------------------------------------------*/
376:
377: private FeedSaveInfo getOrMakeFeedSaveInfo(FeedInfo feedInfo) {
378: FeedSaveInfo saveInfo = perFeedSaveAsMap.get(feedInfo);
379: if (saveInfo == null) {
380: saveInfo = new FeedSaveInfo();
381: perFeedSaveAsMap.put(feedInfo, saveInfo);
382: }
383:
384: return saveInfo;
385: }
386:
387: private void handleSaveAsConfigParam(final String sectionName,
388: final String paramName, final CurnConfig config,
389: final FeedInfo feedInfo) throws CurnException,
390: ConfigurationException {
391: final FeedSaveInfo saveInfo = getOrMakeFeedSaveInfo(feedInfo);
392:
393: // Parse the value as a command line.
394:
395: UsageInfo usageInfo = new UsageInfo();
396: usageInfo.addOption('b', "backups", "<n>",
397: "Number of backups to keep");
398: usageInfo.addOption('e', "encoding", "<encoding>",
399: "Desired output encoding");
400: usageInfo.addParameter("<path>", "Path to RSS output file",
401: true);
402:
403: // Inner class for handling command-line syntax of the value.
404:
405: class ConfigParameterHandler implements ParameterHandler {
406: private String rawValue;
407:
408: ConfigParameterHandler(String rawValue) {
409: this .rawValue = rawValue;
410: }
411:
412: public void parseOption(char shortOption,
413: String longOption, Iterator<String> it)
414: throws CommandLineUsageException,
415: NoSuchElementException {
416: String value;
417: switch (shortOption) {
418: case 'b':
419: value = it.next();
420: try {
421: saveInfo.backups = Integer.parseInt(value);
422: }
423:
424: catch (NumberFormatException ex) {
425: throw new CommandLineUsageException("Section ["
426: + sectionName + "], parameter \""
427: + paramName + "\": "
428: + "Unexpected non-numeric value \""
429: + value + "\" for \""
430: + UsageInfo.SHORT_OPTION_PREFIX
431: + shortOption + "\" option.");
432: }
433: break;
434:
435: case 'e':
436: saveInfo.saveAsEncoding = it.next();
437: break;
438:
439: default:
440: throw new CommandLineUsageException("Section ["
441: + sectionName + "], parameter \""
442: + paramName + "\": " + "Unknown option \""
443: + UsageInfo.SHORT_OPTION_PREFIX
444: + shortOption + "\" in value \"" + rawValue
445: + "\"");
446: }
447: }
448:
449: public void parsePostOptionParameters(Iterator<String> it)
450: throws CommandLineUsageException,
451: NoSuchElementException {
452: saveInfo.saveAsFile = CurnUtil.mapConfiguredPathName(it
453: .next());
454: }
455: }
456: ;
457:
458: // Parse the parameters.
459:
460: ParameterParser paramParser = new ParameterParser(usageInfo);
461: String rawValue = config.getConfigurationValue(sectionName,
462: paramName);
463: try {
464: String[] valueTokens = config.getConfigurationTokens(
465: sectionName, paramName);
466: if (log.isDebugEnabled()) {
467: log.debug("[" + sectionName + "]: SaveAsRSS: value=\""
468: + rawValue + "\", tokens="
469: + TextUtil.join(valueTokens, '|'));
470: }
471:
472: ConfigParameterHandler handler = new ConfigParameterHandler(
473: rawValue);
474: log.debug("Parsing value \"" + rawValue + "\"");
475: paramParser.parse(valueTokens, handler);
476: log.debug("Section [" + sectionName + "], parameter \""
477: + paramName + "\": backups=" + saveInfo.backups
478: + ", encoding=" + saveInfo.saveAsEncoding
479: + ", path=" + saveInfo.saveAsFile.getPath());
480: }
481:
482: catch (CommandLineUsageException ex) {
483: throw new CurnException("Section [" + sectionName
484: + "], parameter \"" + paramName
485: + "\": Error parsing value \"" + rawValue + "\"",
486: ex);
487: }
488: }
489: }
|