001: //
002: // Informa -- RSS Library for Java
003: // Copyright (c) 2002 by Niko Schmuck
004: //
005: // Niko Schmuck
006: // http://sourceforge.net/projects/informa
007: // mailto:niko_schmuck@users.sourceforge.net
008: //
009: // This library is free software.
010: //
011: // You may redistribute it and/or modify it under the terms of the GNU
012: // Lesser General Public License as published by the Free Software Foundation.
013: //
014: // Version 2.1 of the license should be included with this distribution in
015: // the file LICENSE. If the license is not included with this distribution,
016: // you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
017: // or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge,
018: // MA 02139 USA.
019: //
020: // This library is distributed in the hope that it will be useful,
021: // but WITHOUT ANY WARRANTY; without even the implied waranty of
022: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
023: // Lesser General Public License for more details.
024: //
025: // $Id: FeedManagerEntry.java,v 1.18 2006/12/04 23:43:27 italobb Exp $
026:
027: package de.nava.informa.utils;
028:
029: import de.nava.informa.core.ChannelBuilderIF;
030: import de.nava.informa.core.ChannelIF;
031: import de.nava.informa.core.ChannelUpdatePeriod;
032: import de.nava.informa.core.FeedIF;
033: import de.nava.informa.core.ParseException;
034: import de.nava.informa.impl.basic.Feed;
035: import de.nava.informa.parsers.FeedParser;
036:
037: import java.io.IOException;
038:
039: import java.net.URL;
040: import java.net.HttpURLConnection;
041: import java.net.URLConnection;
042:
043: import java.util.Date;
044:
045: import org.apache.commons.logging.Log;
046: import org.apache.commons.logging.LogFactory;
047:
048: /**
049: * Holder class for feeds held in the manager. The purpose of this class is to
050: * store the last time we loaded the feed, and determine if the feed needs to be
051: * reread. Whilst we use the data provided by the feed where possible, if this
052: * is not present defaults will be used.
053: * <p>
054: * Its also important to note that we do oversimply things a bit. We ignore the
055: * updateBase even if specified by the feed.
056: * </p>
057: *
058: * @author Sam Newman
059: * @see FeedManager
060: */
061: public class FeedManagerEntry {
062:
063: public static final long MILLISECONDS_IN_HOUR = 3600000L;
064:
065: public static final long MILLISECONDS_IN_DAY = 86400000L;
066:
067: public static final long MILLISECONDS_IN_MONTH = 2419200000L;
068:
069: /** Over simplificatin here - assuming a non-leap year */
070: public static final long MILLISECONDS_IN_YEAR = 31536000000L;
071:
072: /* logger handler */
073: private static Log logger = LogFactory
074: .getLog(FeedManagerEntry.class);
075:
076: private ChannelUpdatePeriod defaultUpdatePeriod;
077:
078: private int defaultUpdateFrequency;
079:
080: /**
081: * Stores the number of milliseconds since the last update after which the
082: * feed is out of date
083: */
084: private long timeToExpire;
085:
086: /** The channel we hold */
087: private FeedIF feed;
088:
089: /** The last time we updated a feed */
090: private long lastUpdate;
091:
092: /** The URI for the feed */
093: private String feedUri;
094:
095: private ChannelBuilderIF channelBuilder;
096:
097: /** the wantedTtl for the feed * */
098: private long wantedTtl = -1;
099:
100: /** stores the values necessary to make conditionnale GET * */
101: private ConditionalGetValues httpHeaders = new ConditionalGetValues();
102:
103: /**
104: * Creates a new FeedManagerEntry object.
105: *
106: * @param feedUri
107: * @param builder
108: * @param defaultUpdatePeriod2
109: * @param defaultUpdateFrequency
110: * @throws FeedManagerException
111: */
112: public FeedManagerEntry(String feedUri, ChannelBuilderIF builder,
113: ChannelUpdatePeriod defaultUpdatePeriod2,
114: int defaultUpdateFrequency) throws FeedManagerException {
115: this .feedUri = feedUri;
116: this .channelBuilder = builder;
117: this .defaultUpdatePeriod = defaultUpdatePeriod2;
118: this .defaultUpdateFrequency = defaultUpdateFrequency;
119: this .feed = retrieveFeed(feedUri);
120: this .lastUpdate = System.currentTimeMillis();
121: }
122:
123: public ChannelUpdatePeriod getDefaultUpdatePeriod() {
124: return defaultUpdatePeriod;
125: }
126:
127: public void setDefaultUpdatePeriod(
128: ChannelUpdatePeriod defaultUpdatePeriod) {
129: this .defaultUpdatePeriod = defaultUpdatePeriod;
130: }
131:
132: public int getDefaultUpdateFrequency() {
133: return defaultUpdateFrequency;
134: }
135:
136: public void setDefaultUpdateFrequency(int defaultUpdateFrequency) {
137: this .defaultUpdateFrequency = defaultUpdateFrequency;
138: }
139:
140: /**
141: * Loads the channel and sets up the time to expire
142: *
143: * @param uri
144: * The location for the rss file
145: * @return The Channel
146: * @throws FeedManagerException
147: * If the feed specified by <code>uri</code> is invalid
148: */
149: private FeedIF retrieveFeed(String uri) throws FeedManagerException {
150: try {
151: URL urlToRetrieve = new URL(uri);
152:
153: URLConnection conn = null;
154: try {
155: conn = urlToRetrieve.openConnection();
156:
157: if (conn instanceof HttpURLConnection) {
158:
159: HttpURLConnection httpConn = (HttpURLConnection) conn;
160:
161: httpConn.setInstanceFollowRedirects(true); // not needed, default ?
162:
163: // Hack for User-Agent : problem for
164: // http://www.diveintomark.org/xml/rss.xml
165: HttpHeaderUtils.setUserAgent(httpConn,
166: "Informa Java API");
167:
168: logger
169: .debug("retr feed at url "
170: + uri
171: + ": ETag"
172: + HttpHeaderUtils
173: .getETagValue(httpConn)
174: + " if-modified :"
175: + HttpHeaderUtils
176: .getLastModified(httpConn));
177:
178: // get initial values for cond. GET in updateChannel
179: this .httpHeaders.setETag(HttpHeaderUtils
180: .getETagValue(httpConn));
181: this .httpHeaders.setIfModifiedSince(HttpHeaderUtils
182: .getLastModified(httpConn));
183: }
184: } catch (java.lang.ClassCastException e) {
185: conn = null;
186: logger.warn("problem cast to HttpURLConnection " + uri,
187: e);
188: throw new FeedManagerException(e);
189: } catch (NullPointerException e) {
190: logger.error("problem NPE " + uri + " conn=" + conn, e);
191: conn = null;
192: throw new FeedManagerException(e);
193: }
194:
195: ChannelIF channel = null;
196: /*
197: * if ( conn == null ) { channel = FeedParser.parse(getChannelBuilder(),
198: * uri); } else {
199: */
200: channel = FeedParser.parse(getChannelBuilder(), conn
201: .getInputStream());
202: //}
203:
204: this .timeToExpire = getTimeToExpire(channel);
205: this .feed = new Feed(channel);
206:
207: Date currDate = new Date();
208: this .feed.setLastUpdated(currDate);
209: this .feed.setDateFound(currDate);
210: this .feed.setLocation(urlToRetrieve);
211: logger.info("feed retrieved " + uri);
212:
213: } catch (IOException e) {
214: logger.error("IOException " + feedUri + " e=" + e);
215: e.printStackTrace();
216: throw new FeedManagerException(e);
217: } catch (ParseException e) {
218: e.printStackTrace();
219: throw new FeedManagerException(e);
220: }
221:
222: return this .feed;
223: }
224:
225: /**
226: * Updates the channel associated with this feed use conditional get stuff.
227: * http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers
228: *
229: * @throws FeedManagerException
230: */
231: private synchronized void updateChannel()
232: throws FeedManagerException {
233: try {
234: String feedUrl = this .feed.getLocation().toString();
235:
236: URL aURL = null;
237: try {
238: aURL = new URL(feedUrl);
239: } catch (java.net.MalformedURLException e) {
240: logger.error("Could not create URL for " + feedUrl);
241: }
242:
243: URLConnection conn = null;
244: try {
245: conn = aURL.openConnection();
246:
247: if (conn instanceof HttpURLConnection) {
248:
249: HttpURLConnection httpConn = (HttpURLConnection) conn;
250:
251: httpConn.setInstanceFollowRedirects(true);
252: // Hack for User-Agent : problem for
253: // http://www.diveintomark.org/xml/rss.xml
254: HttpHeaderUtils.setUserAgent(httpConn,
255: "Informa Java API");
256: HttpHeaderUtils.setETagValue(httpConn,
257: this .httpHeaders.getETag());
258: HttpHeaderUtils.setIfModifiedSince(httpConn,
259: this .httpHeaders.getIfModifiedSince());
260: httpConn.connect();
261: if (httpConn.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED) {
262:
263: logger.info("cond. GET for feed at url "
264: + feedUrl + ": no change");
265: this .feed.setLastUpdated(new Date());
266: // TODO : add a property in FeedIF interface for lastGet ?
267: this .lastUpdate = System.currentTimeMillis();
268: return;
269: }
270: logger.info("cond. GET for feed at url " + feedUrl
271: + ": changed");
272: logger
273: .debug("feed at url "
274: + feedUrl
275: + " new values : ETag"
276: + HttpHeaderUtils
277: .getETagValue(httpConn)
278: + " if-modified :"
279: + HttpHeaderUtils
280: .getLastModified(httpConn));
281:
282: this .httpHeaders.setETag(HttpHeaderUtils
283: .getETagValue(httpConn));
284: this .httpHeaders.setIfModifiedSince(HttpHeaderUtils
285: .getLastModified(httpConn));
286: }
287:
288: } catch (java.lang.ClassCastException e) {
289: logger.warn(
290: "problem cast to HttpURLConnection (reading from a file?) "
291: + feedUrl, e);
292: }
293:
294: ChannelIF channel = null;
295: if (conn == null) {
296: channel = FeedParser
297: .parse(getChannelBuilder(), feedUrl);
298: } else {
299: channel = FeedParser.parse(getChannelBuilder(), conn
300: .getInputStream());
301: }
302:
303: this .feed.setChannel(channel);
304: this .feed.setLastUpdated(new Date());
305: this .lastUpdate = System.currentTimeMillis();
306: logger.info("feed updated " + feedUrl);
307: } catch (IOException e) {
308: throw new FeedManagerException(e);
309: } catch (ParseException e) {
310: throw new FeedManagerException(e);
311: }
312: }
313:
314: /**
315: * Checks to see if the feed is out of date - if it is the feed is reloaded
316: * from the URI, otherwise the cached version is returned.
317: *
318: * @return The up todate feed
319: * @throws FeedManagerException
320: */
321: public FeedIF getFeed() throws FeedManagerException {
322: if (isOutOfDate()) {
323: updateChannel();
324: }
325: return this .feed;
326: }
327:
328: public void setWantedTtl(long ms) {
329: this .wantedTtl = ms;
330: //recalculate the timeToExpire
331: this .timeToExpire = this
332: .getTimeToExpire(this .feed.getChannel());
333: }
334:
335: /**
336: * Based on the update period and update frequceny and on the optional
337: * wantedTtl for the feed, calculate how many milliseconds after the
338: * <code>lastUpdate</code> before this feed is considered out of date
339: *
340: * @param channel
341: * @return The number of milliseconds before we can consider the feed invalid
342: * @throws IllegalArgumentException
343: */
344: private long getTimeToExpire(ChannelIF channel) {
345: long temp = (new CacheSettings()).getTtl(channel,
346: this .wantedTtl);
347: return temp;
348: }
349:
350: /**
351: * Determines if the feed is out of date.
352: *
353: * @return false if the feed is up to date, else true
354: */
355: private boolean isOutOfDate() {
356: boolean outOfDate = false;
357: logger.info(this + " isOutOfDate " + this .feedUri + "lupdt: "
358: + lastUpdate + ",tte=" + timeToExpire + "<?"
359: + (System.currentTimeMillis() - lastUpdate));
360: if ((lastUpdate + timeToExpire) < System.currentTimeMillis()) {
361: outOfDate = true;
362: }
363: return outOfDate;
364: }
365:
366: private ChannelBuilderIF getChannelBuilder() {
367: return channelBuilder;
368: }
369:
370: }
|