001: /*---------------------------------------------------------------------------*\
002: $Id: RawFeedEditPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.CurnConfig;
049: import org.clapper.curn.CurnException;
050: import org.clapper.curn.FeedInfo;
051: import org.clapper.curn.FeedConfigItemPlugIn;
052: import org.clapper.curn.PostFeedDownloadPlugIn;
053:
054: import org.clapper.util.classutil.ClassUtil;
055: import org.clapper.util.config.ConfigurationException;
056: import org.clapper.util.logging.Logger;
057: import java.io.File;
058: import java.io.IOException;
059:
060: import java.util.ArrayList;
061: import java.util.HashMap;
062: import java.util.List;
063: import java.util.Map;
064: import org.clapper.util.io.FileUtil;
065:
066: /**
067: * The <tt>RawFeedEditPlugIn</tt> edits the raw downloaded XML before it's
068: * parsed. It can be used to fix known errors in the XML. It intercepts the
069: * following per-feed configuration parameters:
070: *
071: * <table border="1">
072: * <tr valign="top">
073: * <td><tt>PreparseEdit<i>suffix</i></tt></td>
074: * <td>Specifies a regular expression substitution to be applied to the
075: * XML. Multiple expressions may be specified per feed. See the User's
076: * Guide for details.
077: * </td>
078: * </tr>
079: * </table>
080: *
081: * @version <tt>$Revision: 7041 $</tt>
082: */
083: public class RawFeedEditPlugIn extends AbstractXMLEditPlugIn implements
084: FeedConfigItemPlugIn, PostFeedDownloadPlugIn {
085: /*----------------------------------------------------------------------*\
086: Private Constants
087: \*----------------------------------------------------------------------*/
088:
089: private static final String VAR_PREPARSE_EDIT = "PreparseEdit";
090: private static final String VAR_SAVE_EDITED_XML_AS = "SaveEditedXMLAs";
091:
092: /*----------------------------------------------------------------------*\
093: Private Classes
094: \*----------------------------------------------------------------------*/
095:
096: /**
097: * Feed edit info
098: */
099: class FeedEditInfo {
100: List<String> editCommands = new ArrayList<String>();
101: File saveAs = null;
102:
103: FeedEditInfo() {
104: // Nothing to do
105: }
106: }
107:
108: /*----------------------------------------------------------------------*\
109: Private Data Items
110: \*----------------------------------------------------------------------*/
111:
112: /**
113: * Feed save data, by feed
114: */
115: private Map<FeedInfo, FeedEditInfo> perFeedEditInfoMap = new HashMap<FeedInfo, FeedEditInfo>();
116:
117: /**
118: * For log messages
119: */
120: private static final Logger log = new Logger(
121: RawFeedEditPlugIn.class);
122:
123: /*----------------------------------------------------------------------*\
124: Constructor
125: \*----------------------------------------------------------------------*/
126:
127: /**
128: * Default constructor (required).
129: */
130: public RawFeedEditPlugIn() {
131: // Nothing to do
132: }
133:
134: /*----------------------------------------------------------------------*\
135: Public Methods Required by *PlugIn Interfaces
136: \*----------------------------------------------------------------------*/
137:
138: /**
139: * Get a displayable name for the plug-in.
140: *
141: * @return the name
142: */
143: public String getPlugInName() {
144: return "Raw Feed Edit";
145: }
146:
147: /**
148: * Get the sort key for this plug-in.
149: *
150: * @return the sort key string.
151: */
152: public String getPlugInSortKey() {
153: return ClassUtil.getShortClassName(getClass().getName());
154: }
155:
156: /**
157: * Initialize the plug-in. This method is called before any of the
158: * plug-in methods are called.
159: *
160: * @throws CurnException on error
161: */
162: public void initPlugIn() throws CurnException {
163: }
164:
165: /**
166: * Called immediately after <i>curn</i> has read and processed a
167: * configuration item in a "feed" configuration section. All
168: * configuration items are passed, one by one, to each loaded plug-in.
169: * If a plug-in class is not interested in a particular configuration
170: * item, this method should simply return without doing anything. Note
171: * that some configuration items may simply be variable assignment;
172: * there's no real way to distinguish a variable assignment from a
173: * blessed configuration item.
174: *
175: * @param sectionName the name of the configuration section where
176: * the item was found
177: * @param paramName the name of the parameter
178: * @param config the active configuration
179: * @param feedInfo partially complete <tt>FeedInfo</tt> object
180: * for the feed. The URL is guaranteed to be
181: * present, but no other fields are.
182: *
183: * @return <tt>true</tt> to continue processing the feed,
184: * <tt>false</tt> to skip it
185: *
186: * @throws CurnException on error
187: *
188: * @see CurnConfig
189: * @see FeedInfo
190: * @see FeedInfo#getURL
191: */
192: public boolean runFeedConfigItemPlugIn(String sectionName,
193: String paramName, CurnConfig config, FeedInfo feedInfo)
194: throws CurnException {
195: try {
196: if (paramName.startsWith(VAR_PREPARSE_EDIT)) {
197: FeedEditInfo editInfo = getOrMakeFeedEditInfo(feedInfo);
198: String value = config.getConfigurationValue(
199: sectionName, paramName);
200: editInfo.editCommands.add(value);
201: log.debug("[" + sectionName + "]: added regexp "
202: + value);
203: }
204:
205: else if (paramName.equals(VAR_SAVE_EDITED_XML_AS)) {
206: FeedEditInfo editInfo = getOrMakeFeedEditInfo(feedInfo);
207: String value = config.getConfigurationValue(
208: sectionName, paramName);
209: editInfo.saveAs = new File(value);
210: log.debug("[" + sectionName
211: + "]: will save edited XML " + "as \""
212: + editInfo.saveAs.getPath() + "\"");
213: }
214:
215: return true;
216: }
217:
218: catch (ConfigurationException ex) {
219: throw new CurnException(ex);
220: }
221: }
222:
223: /**
224: * Called immediately after a feed is downloaded. This method can
225: * return <tt>false</tt> to signal <i>curn</i> that the feed should be
226: * skipped. For instance, a plug-in that filters on the unparsed XML
227: * feed content could use this method to weed out non-matching feeds
228: * before they are downloaded.
229: *
230: * @param feedInfo the {@link FeedInfo} object for the feed that
231: * has been downloaded
232: * @param feedDataFile the file containing the downloaded, unparsed feed
233: * XML. <b><i>curn</i> may delete this file after all
234: * plug-ins are notified!</b>
235: * @param encoding the encoding used to store the data in the file,
236: * or null for the default
237: *
238: * @return <tt>true</tt> if <i>curn</i> should continue to process the
239: * feed, <tt>false</tt> to skip the feed. A return value of
240: * <tt>false</tt> aborts all further processing on the feed.
241: * In particular, <i>curn</i> will not pass the feed along to
242: * other plug-ins that have yet to be notified of this event.
243: *
244: * @throws CurnException on error
245: *
246: * @see FeedInfo
247: */
248: public boolean runPostFeedDownloadPlugIn(FeedInfo feedInfo,
249: File feedDataFile, String encoding) throws CurnException {
250: FeedEditInfo editInfo = perFeedEditInfoMap.get(feedInfo);
251:
252: if ((editInfo != null) && (editInfo.editCommands.size() > 0)) {
253: editXML(feedInfo, feedDataFile, encoding,
254: editInfo.editCommands);
255: if (editInfo.saveAs != null) {
256: try {
257: FileUtil.copyTextFile(feedDataFile, encoding,
258: editInfo.saveAs, encoding);
259: }
260:
261: catch (IOException ex) {
262: throw new CurnException(ex);
263: }
264: }
265: }
266:
267: return true;
268: }
269:
270: /*----------------------------------------------------------------------*\
271: Protected Methods
272: \*----------------------------------------------------------------------*/
273:
274: protected Logger getLogger() {
275: return log;
276: }
277:
278: /*----------------------------------------------------------------------*\
279: Private Methods
280: \*----------------------------------------------------------------------*/
281:
282: private FeedEditInfo getOrMakeFeedEditInfo(FeedInfo feedInfo) {
283: FeedEditInfo editInfo = perFeedEditInfoMap.get(feedInfo);
284: if (editInfo == null) {
285: editInfo = new FeedEditInfo();
286: perFeedEditInfoMap.put(feedInfo, editInfo);
287: }
288:
289: return editInfo;
290: }
291: }
|