001: /*---------------------------------------------------------------------------*\
002: $Id: GzipDownloadPlugIn.java 7041 2007-09-09 01:04:47Z bmc $
003: ---------------------------------------------------------------------------
004: This software is released under a BSD-style license:
005:
006: Copyright (c) 2004-2007 Brian M. Clapper. All rights reserved.
007:
008: Redistribution and use in source and binary forms, with or without
009: modification, are permitted provided that the following conditions are
010: met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. The end-user documentation included with the redistribution, if any,
016: must include the following acknowlegement:
017:
018: "This product includes software developed by Brian M. Clapper
019: (bmc@clapper.org, http://www.clapper.org/bmc/). That software is
020: copyright (c) 2004-2007 Brian M. Clapper."
021:
022: Alternately, this acknowlegement may appear in the software itself,
023: if wherever such third-party acknowlegements normally appear.
024:
025: 3. Neither the names "clapper.org", "curn", nor any of the names of the
026: project contributors may be used to endorse or promote products
027: derived from this software without prior written permission. For
028: written permission, please contact bmc@clapper.org.
029:
030: 4. Products derived from this software may not be called "curn", nor may
031: "clapper.org" appear in their names without prior written permission
032: of Brian M. Clapper.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
036: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
037: NO EVENT SHALL BRIAN M. CLAPPER BE LIABLE FOR ANY DIRECT, INDIRECT,
038: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
039: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
040: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
041: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044: \*---------------------------------------------------------------------------*/
045:
046: package org.clapper.curn.plugins;
047:
048: import org.clapper.curn.CurnConfig;
049: import org.clapper.curn.CurnException;
050: import org.clapper.curn.FeedInfo;
051: import org.clapper.curn.MainConfigItemPlugIn;
052: import org.clapper.curn.FeedConfigItemPlugIn;
053: import org.clapper.curn.PreFeedDownloadPlugIn;
054: import org.clapper.curn.CurnUtil;
055:
056: import org.clapper.util.classutil.ClassUtil;
057: import org.clapper.util.config.ConfigurationException;
058: import org.clapper.util.logging.Logger;
059: import java.net.URLConnection;
060:
061: import java.util.Map;
062: import java.util.HashMap;
063:
064: /**
065: * The <tt>GzipDownloadPlugIn</tt> handles setting the global and
066: * per-feed HTTP header that requests gzipped (compressed) feed data
067: * (assuming the remote server honors that header). It intercepts the
068: * following configuration parameters:
069: *
070: * <table border="1">
071: * <tr valign="top" align="left">
072: * <th>Section</th>
073: * <th>Parameter</th>
074: * <th>Meaning</th>
075: * </tr>
076: * <tr valign="top">
077: * <td><tt>[curn]</tt></td>
078: * <td><tt>GzipDownload</tt></td>
079: * <td>The global default setting, if none is supplied in individual feed
080: * sections. Defaults to true.</td>
081: * </tr>
082: * <tr valign="top">
083: * <td><tt>[Feed<i>xxx</i>]</tt></td>
084: * <td><tt>GzipDownload</tt></td>
085: * <td>Whether or not to ask for gzipped data for a particular feed.
086: * Defaults to the global setting if not specified.</td>
087: * </tr>
088: * </table>
089: *
090: * @version <tt>$Revision: 7041 $</tt>
091: */
092: public class GzipDownloadPlugIn implements MainConfigItemPlugIn,
093: FeedConfigItemPlugIn, PreFeedDownloadPlugIn {
094: /*----------------------------------------------------------------------*\
095: Private Constants
096: \*----------------------------------------------------------------------*/
097:
098: private static final String VAR_OLD_GET_GZIPPED_FEEDS = "GetGzippedFeeds";
099: private static final String VAR_GZIP_DOWNLOAD = "GzipDownload";
100:
101: /*----------------------------------------------------------------------*\
102: Private Data Items
103: \*----------------------------------------------------------------------*/
104:
105: /**
106: * Feed save data, by feed
107: */
108: private Map<FeedInfo, Boolean> perFeedGzipFlag = new HashMap<FeedInfo, Boolean>();
109:
110: /**
111: * Default setting
112: */
113: private boolean requestGzipDefault = true;
114:
115: /**
116: * For log messages
117: */
118: private static final Logger log = new Logger(
119: GzipDownloadPlugIn.class);
120:
121: /*----------------------------------------------------------------------*\
122: Constructor
123: \*----------------------------------------------------------------------*/
124:
125: /**
126: * Default constructor (required).
127: */
128: public GzipDownloadPlugIn() {
129: // Nothing to do
130: }
131:
132: /*----------------------------------------------------------------------*\
133: Public Methods Required by *PlugIn Interfaces
134: \*----------------------------------------------------------------------*/
135:
136: /**
137: * Get a displayable name for the plug-in.
138: *
139: * @return the name
140: */
141: public String getPlugInName() {
142: return "Gzip Download";
143: }
144:
145: /**
146: * Get the sort key for this plug-in.
147: *
148: * @return the sort key string.
149: */
150: public String getPlugInSortKey() {
151: return ClassUtil.getShortClassName(getClass().getName());
152: }
153:
154: /**
155: * Initialize the plug-in. This method is called before any of the
156: * plug-in methods are called.
157: *
158: * @throws CurnException on error
159: */
160: public void initPlugIn() throws CurnException {
161: }
162:
163: /**
164: * Called immediately after <i>curn</i> has read and processed a
165: * configuration item in the main [curn] configuration section. All
166: * configuration items are passed, one by one, to each loaded plug-in.
167: * If a plug-in class is not interested in a particular configuration
168: * item, this method should simply return without doing anything. Note
169: * that some configuration items may simply be variable assignment;
170: * there's no real way to distinguish a variable assignment from a
171: * blessed configuration item.
172: *
173: * @param sectionName the name of the configuration section where
174: * the item was found
175: * @param paramName the name of the parameter
176: * @param config the {@link CurnConfig} object
177: *
178: * @throws CurnException on error
179: *
180: * @see CurnConfig
181: */
182: public void runMainConfigItemPlugIn(String sectionName,
183: String paramName, CurnConfig config) throws CurnException {
184: try {
185: if (paramName.equals(VAR_GZIP_DOWNLOAD)) {
186: requestGzipDefault = config.getRequiredBooleanValue(
187: sectionName, paramName);
188: }
189:
190: else if (paramName.equals(VAR_OLD_GET_GZIPPED_FEEDS)) {
191: String msg = config.getDeprecatedParamMessage(
192: paramName, VAR_GZIP_DOWNLOAD);
193: CurnUtil.getErrorOut().println(msg);
194: log.warn(msg);
195:
196: requestGzipDefault = config.getRequiredBooleanValue(
197: sectionName, paramName);
198: }
199: }
200:
201: catch (ConfigurationException ex) {
202: throw new CurnException(ex);
203: }
204: }
205:
206: /**
207: * Called immediately after <i>curn</i> has read and processed a
208: * configuration item in a "feed" configuration section. All
209: * configuration items are passed, one by one, to each loaded plug-in.
210: * If a plug-in class is not interested in a particular configuration
211: * item, this method should simply return without doing anything. Note
212: * that some configuration items may simply be variable assignment;
213: * there's no real way to distinguish a variable assignment from a
214: * blessed configuration item.
215: *
216: * @param sectionName the name of the configuration section where
217: * the item was found
218: * @param paramName the name of the parameter
219: * @param config the active configuration
220: * @param feedInfo partially complete <tt>FeedInfo</tt> object
221: * for the feed. The URL is guaranteed to be
222: * present, but no other fields are.
223: *
224: * @return <tt>true</tt> to continue processing the feed,
225: * <tt>false</tt> to skip it
226: *
227: * @throws CurnException on error
228: *
229: * @see CurnConfig
230: * @see FeedInfo
231: * @see FeedInfo#getURL
232: */
233: public boolean runFeedConfigItemPlugIn(String sectionName,
234: String paramName, CurnConfig config, FeedInfo feedInfo)
235: throws CurnException {
236: try {
237: if (paramName.equals(VAR_GZIP_DOWNLOAD)) {
238: boolean flag = config.getRequiredBooleanValue(
239: sectionName, paramName);
240: perFeedGzipFlag.put(feedInfo, flag);
241: log.debug("[" + sectionName + "]: " + paramName + "="
242: + flag);
243: }
244:
245: else if (paramName.equals(VAR_OLD_GET_GZIPPED_FEEDS)) {
246: String msg = config.getDeprecatedParamMessage(
247: paramName, VAR_GZIP_DOWNLOAD);
248: CurnUtil.getErrorOut().println(msg);
249: log.warn(msg);
250:
251: boolean flag = config.getRequiredBooleanValue(
252: sectionName, paramName);
253: perFeedGzipFlag.put(feedInfo, flag);
254: log.debug("[" + sectionName + "]: " + paramName + "="
255: + flag);
256: }
257:
258: return true;
259: }
260:
261: catch (ConfigurationException ex) {
262: throw new CurnException(ex);
263: }
264: }
265:
266: /**
267: * <p>Called just before a feed is downloaded. This method can return
268: * <tt>false</tt> to signal <i>curn</i> that the feed should be
269: * skipped. The plug-in method can also set values on the
270: * <tt>URLConnection</tt> used to download the plug-in, via
271: * <tt>URL.setRequestProperty()</tt>. (Note that <i>all</i> URLs, even
272: * <tt>file:</tt> URLs, are passed into this method. Setting a request
273: * property on the <tt>URLConnection</tt> object for a <tt>file:</tt>
274: * URL will have no effect--though it isn't specifically harmful.)</p>
275: *
276: * <p>Possible uses for a pre-feed download plug-in include:</p>
277: *
278: * <ul>
279: * <li>filtering on feed URL to prevent downloading non-matching feeds
280: * <li>changing the default User-Agent value
281: * <li>setting a non-standard HTTP header field
282: * </ul>
283: *
284: * @param feedInfo the {@link FeedInfo} object for the feed to be
285: * downloaded
286: * @param urlConn the <tt>java.net.URLConnection</tt> object that will
287: * be used to download the feed's XML.
288: *
289: * @return <tt>true</tt> if <i>curn</i> should continue to process the
290: * feed, <tt>false</tt> to skip the feed
291: *
292: * @throws CurnException on error
293: *
294: * @see FeedInfo
295: */
296: public boolean runPreFeedDownloadPlugIn(FeedInfo feedInfo,
297: URLConnection urlConn) throws CurnException {
298: Boolean gzipBoxed = perFeedGzipFlag.get(feedInfo);
299: boolean gzip = requestGzipDefault;
300:
301: if (gzipBoxed != null)
302: gzip = gzipBoxed;
303:
304: if (gzip) {
305: log.debug("Setting header \"Accept-Encoding\" to \"gzip\" "
306: + "for feed \"" + feedInfo.getURL() + "\"");
307:
308: urlConn.setRequestProperty("Accept-Encoding", "gzip");
309: }
310:
311: return true;
312: }
313:
314: /*----------------------------------------------------------------------*\
315: Private Methods
316: \*----------------------------------------------------------------------*/
317: }
|