001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit;
039:
040: import java.io.Serializable;
041: import java.util.Collections;
042: import java.util.Date;
043: import java.util.HashMap;
044: import java.util.Map;
045:
046: import org.apache.commons.httpclient.util.DateParseException;
047: import org.apache.commons.httpclient.util.DateUtil;
048: import org.apache.commons.lang.math.NumberUtils;
049: import org.apache.commons.lang.time.DateUtils;
050:
051: /**
052: * <p>Simple cache implementation.</p>
053: *
054: * <p>The current implementation's main purpose is to provide the ability to cache <tt>.js</tt> files.</p>
055: *
056: * @version $Revision: 2132 $
057: * @author Marc Guillemot
058: * @author Daniel Gredler
059: */
060: public class Cache implements Serializable {
061:
062: private static final long serialVersionUID = -3864114727885057419L;
063:
064: private int maxSize_ = 20;
065: private final Map entries_ = Collections
066: .synchronizedMap(new HashMap(maxSize_));
067:
068: /**
069: * A cache entry.
070: */
071: private class Entry implements Comparable {
072: private final WebResponse response_;
073: private long lastAccess_;
074:
075: Entry(final WebResponse response) {
076: response_ = response;
077: lastAccess_ = System.currentTimeMillis();
078: }
079:
080: public int compareTo(final Object other) {
081: return NumberUtils.compare(lastAccess_,
082: ((Entry) other).lastAccess_);
083: }
084:
085: /**
086: * Updates the last access date
087: */
088: public void touch() {
089: lastAccess_ = System.currentTimeMillis();
090: }
091: }
092:
093: /**
094: * Cache the response if needed. The current implementation only caches JavaScript files.
095: *
096: * @param request the request
097: * @param response the response
098: */
099: public void cacheIfNeeded(final WebRequestSettings request,
100: final WebResponse response) {
101: if (isCacheable(request, response)) {
102: entries_.put(response.getUrl(), new Entry(response));
103: deleteOverflow();
104: }
105: }
106:
107: /**
108: * Truncates the cache to the maximal number of entries.
109: */
110: protected void deleteOverflow() {
111: synchronized (entries_) {
112: while (entries_.size() > maxSize_) {
113: final Entry oldestEntry = (Entry) Collections
114: .min(entries_.values());
115: entries_.remove(oldestEntry.response_.getUrl());
116: }
117: }
118: }
119:
120: /**
121: * Determines if the response should be cached.
122: *
123: * @param request the performed request
124: * @param response the received response
125: * @return <code>true</code> if the response should be cached
126: */
127: protected boolean isCacheable(final WebRequestSettings request,
128: final WebResponse response) {
129: return SubmitMethod.GET.equals(response.getRequestMethod())
130: && isJavaScript(response)
131: && !isDynamicContent(response);
132: }
133:
134: /**
135: * <p>Tries to guess if the content is dynamic or not.</p>
136: *
137: * <p>"Since origin servers do not always provide explicit expiration times, HTTP caches typically
138: * assign heuristic expiration times, employing algorithms that use other header values (such as the
139: * <tt>Last-Modified</tt> time) to estimate a plausible expiration time".</p>
140: *
141: * <p>The current implementation considers as dynamic content everything except responses with a
142: * <tt>Last-Modified</tt> header with a date older than 10 minutes or with an <tt>Expires</tt> header
143: * specifying expiration in more than 10 minutes.</p>
144: *
145: * @see <a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html">
146: * @param response the response to examine
147: * @return <code>true</code> if the response should be considered as dynamic and therefore uncacheable
148: */
149: protected boolean isDynamicContent(final WebResponse response) {
150: final Date lastModified = parseDateHeader(response,
151: "Last-Modified");
152: final Date expires = parseDateHeader(response, "Expires");
153:
154: final long delay = 10 * DateUtils.MILLIS_PER_MINUTE;
155: final long now = System.currentTimeMillis();
156:
157: final boolean cacheableContent = (expires != null
158: && (expires.getTime() - now > delay) || (expires == null
159: && lastModified != null && (now
160: - lastModified.getTime() > delay)));
161:
162: return !cacheableContent;
163: }
164:
165: /**
166: * Parses and returns the specified date header of the specified response. This method
167: * returns <tt>null</tt> if the specified header cannot be found or cannot be parsed as
168: * a date.
169: *
170: * @param response the response
171: * @param headerName the header name
172: * @return the specified date header of the specified response
173: */
174: protected Date parseDateHeader(final WebResponse response,
175: final String headerName) {
176: final String value = response
177: .getResponseHeaderValue(headerName);
178: Date date = null;
179: if (value != null) {
180: try {
181: date = DateUtil.parseDate(value);
182: } catch (final DateParseException e) {
183: date = null;
184: }
185: }
186: return date;
187: }
188:
189: /**
190: * Indicates if the provided response is JavaScript content.
191: *
192: * @param webResponse the response to analyze
193: * @return <code>true</code> if it can be considered as JavaScript
194: */
195: protected boolean isJavaScript(final WebResponse webResponse) {
196: final String contentType = webResponse.getContentType()
197: .toLowerCase();
198:
199: // many web applications are badly configured and have wrong headers, look at file extension too
200: return "text/javascript".equals(contentType)
201: || "application/x-javascript".equals(contentType)
202: || webResponse.getUrl().getPath().endsWith(".js");
203: }
204:
205: /**
206: * Returns the cached content corresponding to the specified request. If there is
207: * no corresponding cached content, this method returns <tt>null</tt>.
208: *
209: * @param request the request whose cached content is sought
210: * @return the cached content corresponding to the specified request
211: */
212: public WebResponse getCachedContent(final WebRequestSettings request) {
213: if (!SubmitMethod.GET.equals(request.getSubmitMethod())) {
214: return null;
215: }
216: final Entry cachedEntry = (Entry) entries_
217: .get(request.getURL());
218: if (cachedEntry == null) {
219: return null;
220: } else {
221: synchronized (entries_) {
222: cachedEntry.touch();
223: }
224: return cachedEntry.response_;
225: }
226: }
227:
228: /**
229: * Returns the cache's maximum size. This is the maximum number of files that will
230: * be cached. The default is <tt>20</tt>.
231: *
232: * @return the cache's maximum size
233: */
234: public int getMaxSize() {
235: return maxSize_;
236: }
237:
238: /**
239: * Sets the cache's maximum size. This is the maximum number of files that will
240: * be cached. The default is <tt>20</tt>.
241: *
242: * @param maxSize the cache's maximum size (must be >= 0)
243: */
244: public void setMaxSize(final int maxSize) {
245: if (maxSize < 0) {
246: throw new IllegalArgumentException(
247: "Illegal value for maxSize: " + maxSize);
248: }
249: maxSize_ = maxSize;
250: deleteOverflow();
251: }
252:
253: /**
254: * Returns the number of entries in the cache.
255: *
256: * @return the number of entries in the cache
257: */
258: public int getSize() {
259: return entries_.size();
260: }
261:
262: }
|