001: /*---------------------------------------------------------------------------*\
002: $Id: AllowEmbeddedHTMLPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.CurnConfig;
049: import org.clapper.curn.CurnException;
050: import org.clapper.curn.FeedInfo;
051: import org.clapper.curn.MainConfigItemPlugIn;
052: import org.clapper.curn.FeedConfigItemPlugIn;
053: import org.clapper.curn.PostFeedParsePlugIn;
054: import org.clapper.curn.parser.RSSChannel;
055:
056: import org.clapper.util.classutil.ClassUtil;
057: import org.clapper.util.config.ConfigurationException;
058: import org.clapper.util.logging.Logger;
059:
060: import java.util.HashMap;
061: import java.util.Map;
062: import org.clapper.curn.FeedCache;
063:
064: /**
065: * The <tt>AllowEmbeddedHTMLPlugIn</tt> handles stripping (or not
066: * stripping) embedded HTML from parsed RSS data. It intercepts the
067: * following configuration parameters:
068: *
069: * <table border="1">
070: * <tr valign="top" align="left">
071: * <th>Section</th>
072: * <th>Parameter</th>
073: * <th>Meaning</th>
074: * </tr>
075: * <tr valign="top">
076: * <td><tt>[curn]</tt></td>
077: * <td><tt>AllowEmbeddedHTML</tt></td>
078: * <td>The default (global) setting for all feeds. If not specified, the
079: * default value is "false".</td>
080: * </tr>
081: * <tr valign="top">
082: * <td><tt>[Feed<i>xxx</i>]</tt></td>
083: * <td><tt>AllowEmbeddedHTML</tt></td>
084: * <td>The setting for a specific feed. If not specified, the global
085: * default is used.
086: * </td>
087: * </tr>
088: * </table>
089: *
090: * Note that enabling embedded HTML for a feed might not be sufficient,
091: * since embedded HTML can be disabled on a per-output handler basis, as
092: * well.
093: *
094: * @version <tt>$Revision: 7041 $</tt>
095: */
096: public class AllowEmbeddedHTMLPlugIn implements MainConfigItemPlugIn,
097: FeedConfigItemPlugIn, PostFeedParsePlugIn {
098: /*----------------------------------------------------------------------*\
099: Private Constants
100: \*----------------------------------------------------------------------*/
101:
102: private static final String VAR_ALLOW_EMBEDDED_HTML = "AllowEmbeddedHTML";
103:
104: /*----------------------------------------------------------------------*\
105: Private Data Items
106: \*----------------------------------------------------------------------*/
107:
108: /**
109: * Feed save data, by feed
110: */
111: private Map<FeedInfo, Boolean> perFeedHTMLFlag = new HashMap<FeedInfo, Boolean>();
112:
113: /**
114: * The global default
115: */
116: private boolean allowHTMLDefault = false;
117:
118: /**
119: * For log messages
120: */
121: private static final Logger log = new Logger(
122: AllowEmbeddedHTMLPlugIn.class);
123:
124: /*----------------------------------------------------------------------*\
125: Constructor
126: \*----------------------------------------------------------------------*/
127:
128: /**
129: * Default constructor (required).
130: */
131: public AllowEmbeddedHTMLPlugIn() {
132: // Nothing to do
133: }
134:
135: /*----------------------------------------------------------------------*\
136: Public Methods Required by *PlugIn Interfaces
137: \*----------------------------------------------------------------------*/
138:
139: /**
140: * Get a displayable name for the plug-in.
141: *
142: * @return the name
143: */
144: public String getPlugInName() {
145: return "Allow Embedded HTML";
146: }
147:
148: /**
149: * Get the sort key for this plug-in.
150: *
151: * @return the sort key string.
152: */
153: public String getPlugInSortKey() {
154: return ClassUtil.getShortClassName(getClass().getName());
155: }
156:
157: /**
158: * Initialize the plug-in. This method is called before any of the
159: * plug-in methods are called.
160: *
161: * @throws CurnException on error
162: */
163: public void initPlugIn() throws CurnException {
164: }
165:
166: /**
167: * Called immediately after <i>curn</i> has read and processed a
168: * configuration item in the main [curn] configuration section. All
169: * configuration items are passed, one by one, to each loaded plug-in.
170: * If a plug-in class is not interested in a particular configuration
171: * item, this method should simply return without doing anything. Note
172: * that some configuration items may simply be variable assignment;
173: * there's no real way to distinguish a variable assignment from a
174: * blessed configuration item.
175: *
176: * @param sectionName the name of the configuration section where
177: * the item was found
178: * @param paramName the name of the parameter
179: * @param config the {@link CurnConfig} object
180: *
181: * @throws CurnException on error
182: *
183: * @see CurnConfig
184: */
185: public void runMainConfigItemPlugIn(String sectionName,
186: String paramName, CurnConfig config) throws CurnException {
187: try {
188: if (paramName.equals(VAR_ALLOW_EMBEDDED_HTML)) {
189: allowHTMLDefault = config.getRequiredBooleanValue(
190: sectionName, paramName);
191: }
192: }
193:
194: catch (ConfigurationException ex) {
195: throw new CurnException(ex);
196: }
197: }
198:
199: /**
200: * Called immediately after <i>curn</i> has read and processed a
201: * configuration item in a "feed" configuration section. All
202: * configuration items are passed, one by one, to each loaded plug-in.
203: * If a plug-in class is not interested in a particular configuration
204: * item, this method should simply return without doing anything. Note
205: * that some configuration items may simply be variable assignment;
206: * there's no real way to distinguish a variable assignment from a
207: * blessed configuration item.
208: *
209: * @param sectionName the name of the configuration section where
210: * the item was found
211: * @param paramName the name of the parameter
212: * @param config the active configuration
213: * @param feedInfo partially complete <tt>FeedInfo</tt> object
214: * for the feed. The URL is guaranteed to be
215: * present, but no other fields are.
216: *
217: * @return <tt>true</tt> to continue processing the feed,
218: * <tt>false</tt> to skip it
219: *
220: * @throws CurnException on error
221: *
222: * @see CurnConfig
223: * @see FeedInfo
224: * @see FeedInfo#getURL
225: */
226: public boolean runFeedConfigItemPlugIn(String sectionName,
227: String paramName, CurnConfig config, FeedInfo feedInfo)
228: throws CurnException {
229: try {
230: if (paramName.equals(VAR_ALLOW_EMBEDDED_HTML)) {
231: boolean flag = config.getRequiredBooleanValue(
232: sectionName, paramName);
233: perFeedHTMLFlag.put(feedInfo, flag);
234: log.debug("[" + sectionName + "]: " + paramName + "="
235: + flag);
236: }
237:
238: return true;
239: }
240:
241: catch (ConfigurationException ex) {
242: throw new CurnException(ex);
243: }
244: }
245:
246: /**
247: * Called immediately after a feed is parsed, but before it is
248: * otherwise processed. This method can return <tt>false</tt> to signal
249: * <i>curn</i> that the feed should be skipped. For instance, a plug-in
250: * that filters on the parsed feed data could use this method to weed
251: * out non-matching feeds before they are downloaded. Similarly, a
252: * plug-in that edits the parsed data (removing or editing individual
253: * items, for instance) could use method to do so.
254: *
255: * @param feedInfo the {@link FeedInfo} object for the feed that
256: * has been downloaded and parsed.
257: * @param feedCache the feed cache
258: * @param channel the parsed channel data
259: *
260: * @return <tt>true</tt> if <i>curn</i> should continue to process the
261: * feed, <tt>false</tt> to skip the feed. A return value of
262: * <tt>false</tt> aborts all further processing on the feed.
263: * In particular, <i>curn</i> will not pass the feed along to
264: * other plug-ins that have yet to be notified of this event.
265: *
266: * @throws CurnException on error
267: *
268: * @see RSSChannel
269: * @see FeedInfo
270: */
271: public boolean runPostFeedParsePlugIn(FeedInfo feedInfo,
272: FeedCache feedCache, RSSChannel channel)
273: throws CurnException {
274: Boolean allowBoxed = perFeedHTMLFlag.get(feedInfo);
275: boolean allow = allowHTMLDefault;
276:
277: if (allowBoxed != null)
278: allow = allowBoxed;
279:
280: if (!allow) {
281: log.debug("Stripping all HTML from RSS data for feed \""
282: + feedInfo.getURL().toString() + "\"");
283: channel.stripHTML();
284: }
285:
286: return true;
287: }
288: }
|