001: /*
002: * Copyright (c) 2001 by Matt Welsh and The Regents of the University of
003: * California. All rights reserved.
004: *
005: * Permission to use, copy, modify, and distribute this software and its
006: * documentation for any purpose, without fee, and without written agreement is
007: * hereby granted, provided that the above copyright notice and the following
008: * two paragraphs appear in all copies of this software.
009: *
010: * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
011: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
012: * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
013: * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
014: *
015: * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
016: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
017: * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
018: * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
019: * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
020: *
021: * Author: Matt Welsh <mdw@cs.berkeley.edu>
022: *
023: */
024:
025: package seda.apps.Haboob.cache;
026:
027: import seda.apps.Haboob.*;
028: import seda.sandStorm.api.*;
029: import seda.sandStorm.core.*;
030: import seda.sandStorm.lib.http.*;
031: import seda.sandStorm.lib.aSocket.*;
032: import seda.sandStorm.lib.aDisk.*;
033: import seda.util.*;
034: import java.io.*;
035: import java.util.*;
036:
037: /**
038: * This implementation of the Haboob page cache simply caches
039: * recently accessed Web pages, and randomly removes pages from the
040: * cache when the cache grows too large. This implementation does
041: * not work as well as PageCacheSized.
042: */
043: public class PageCache implements EventHandlerIF, HaboobConst {
044:
045: private static final boolean DEBUG = false;
046: private static final boolean PROFILE = false;
047:
048: // Don't actually read file; just store empty buffer in cache
049: private static final boolean DEBUG_NO_FILE_READ = false;
050: // Don't even stat file; just allocate buffer of fixed size
051: private static final boolean DEBUG_NO_FILE_READ_SAMESIZE = false;
052: private static final int DEBUG_NO_FILE_READ_SAMESIZE_SIZE = 8192;
053:
054: // Rewrite incoming filename so all cache entries hit
055: private static final boolean DEBUG_SINGLE_CACHE_PAGE = false;
056: // If true, rewrite all request URLs to DEBUG_SINGLE_CACHE_PAGE_FNAME
057: // If false, all cache misses access same file, but different entries
058: private static final boolean DEBUG_SINGLE_CACHE_PAGE_SAMENAME = false;
059: // This file is of size 8192 bytes
060: private static final String DEBUG_SINGLE_CACHE_PAGE_FNAME = "/dir00000/class1_7";
061:
062: private String DEFAULT_URL;
063: private String ROOT_DIR;
064:
065: private SinkIF mysink, sendSink;
066: private Hashtable pageTbl; // Map URL -> cacheEntry
067: private Hashtable aFileTbl; // Map aFile -> cacheEntry
068: private int maxCacheSize;
069: private Random rand;
070:
071: private Hashtable mimeTbl; // Filename extension -> MIME type
072: private static final String defaultMimeType = "text/plain";
073:
074: public void init(ConfigDataIF config) throws Exception {
075: mysink = config.getStage().getSink();
076: sendSink = config.getManager().getStage(HTTP_SEND_STAGE)
077: .getSink();
078: pageTbl = new Hashtable();
079: aFileTbl = new Hashtable();
080: rand = new Random();
081:
082: mimeTbl = new Hashtable();
083: mimeTbl.put(".html", "text/html");
084: mimeTbl.put(".gif", "image/gif");
085: mimeTbl.put(".jpg", "image/jpeg");
086: mimeTbl.put(".jpeg", "image/jpeg");
087: mimeTbl.put(".pdf", "application/pdf");
088:
089: DEFAULT_URL = config.getString("defaultURL");
090: if (DEFAULT_URL == null)
091: throw new IllegalArgumentException(
092: "Must specify defaultURL");
093: ROOT_DIR = config.getString("rootDir");
094: if (ROOT_DIR == null)
095: throw new IllegalArgumentException("Must specify rootDir");
096: maxCacheSize = config.getInt("maxCacheSize");
097: }
098:
099: public void destroy() {
100: }
101:
102: public void handleEvent(QueueElementIF item) {
103: if (DEBUG)
104: System.err.println("PageCache: GOT QEL: " + item);
105:
106: if (item instanceof httpRequest) {
107: HaboobStats.numRequests++;
108:
109: httpRequest req = (httpRequest) item;
110: if (req.getRequest() != httpRequest.REQUEST_GET) {
111: HaboobStats.numErrors++;
112: sendSink
113: .enqueue_lossy(new httpResponder(
114: new httpBadRequestResponse(req,
115: "Only GET requests supported at this time"),
116: req, true));
117: return;
118: }
119:
120: String url;
121: if (DEBUG_SINGLE_CACHE_PAGE
122: && DEBUG_SINGLE_CACHE_PAGE_SAMENAME) {
123: url = DEBUG_SINGLE_CACHE_PAGE_FNAME;
124: } else {
125: url = req.getURL();
126: }
127:
128: cacheEntry entry;
129: synchronized (pageTbl) {
130:
131: long t1 = 0, t2;
132: if (PROFILE)
133: t1 = System.currentTimeMillis();
134: entry = (cacheEntry) pageTbl.get(url);
135: if (PROFILE) {
136: t2 = System.currentTimeMillis();
137: HaboobStats.numCacheLookup++;
138: HaboobStats.timeCacheLookup += (t2 - t1);
139: }
140:
141: if (entry == null) {
142: handleCacheMiss(req);
143: }
144: }
145: if (entry != null) {
146: synchronized (entry) {
147: if (entry.pending) {
148: entry.addWaiter(req);
149: } else {
150: HaboobStats.numCacheHits++;
151: entry.send(req);
152: }
153: }
154: }
155:
156: } else if (item instanceof AFileIOCompleted) {
157: AFileIOCompleted comp = (AFileIOCompleted) item;
158: AFile af = comp.getFile();
159: cacheEntry entry = (cacheEntry) aFileTbl.get(af);
160: if (entry == null) {
161: throw new RuntimeException(
162: "PageCache: WARNING: Got AFileIOCompleted for non-entry: "
163: + comp);
164: }
165: if (comp.sizeCompleted != entry.length) {
166: throw new RuntimeException("PageCache: WARNING: Got "
167: + comp.sizeCompleted
168: + " bytes read, expecting " + entry.length);
169: }
170: af.close();
171: aFileTbl.remove(af);
172: entry.done();
173:
174: } else if (item instanceof SinkClosedEvent) {
175: // Ignore
176:
177: } else {
178: System.err.println("PageCache: Got unknown event type: "
179: + item);
180: }
181:
182: }
183:
184: public void handleEvents(QueueElementIF items[]) {
185: for (int i = 0; i < items.length; i++) {
186: handleEvent(items[i]);
187: }
188: }
189:
190: private void handleCacheMiss(httpRequest req) {
191: String url;
192: String fname;
193: long t1 = 0, t2;
194:
195: if (DEBUG_SINGLE_CACHE_PAGE) {
196: if (DEBUG_SINGLE_CACHE_PAGE_SAMENAME) {
197: // Rewrite url
198: url = DEBUG_SINGLE_CACHE_PAGE_FNAME;
199: fname = ROOT_DIR + url;
200: } else {
201: // Rewrite fname, not url
202: url = req.getURL();
203: fname = ROOT_DIR + DEBUG_SINGLE_CACHE_PAGE_FNAME;
204: }
205: } else {
206: url = req.getURL();
207: fname = ROOT_DIR + url;
208: }
209:
210: AFile af = null;
211: AFileStat stat;
212: BufferElement payload = null;
213: httpOKResponse resp;
214: cacheEntry entry;
215:
216: if (DEBUG_NO_FILE_READ && DEBUG_NO_FILE_READ_SAMESIZE) {
217: resp = new httpOKResponse(getMimeType(fname),
218: DEBUG_NO_FILE_READ_SAMESIZE_SIZE);
219: entry = new cacheEntry(req, resp, null,
220: DEBUG_NO_FILE_READ_SAMESIZE_SIZE);
221:
222: } else {
223:
224: // Open file and stat it to determine size
225: try {
226: af = new AFile(fname, mysink, false, true);
227: stat = af.stat();
228: if (stat.isDirectory) {
229: af.close();
230: fname = fname + "/" + DEFAULT_URL;
231: af = new AFile(fname, mysink, false, true);
232: stat = af.stat();
233: }
234:
235: if (PROFILE)
236: t1 = System.currentTimeMillis();
237: resp = new httpOKResponse(getMimeType(fname),
238: (int) stat.length);
239: if (PROFILE) {
240: t2 = System.currentTimeMillis();
241: HaboobStats.numCacheAllocate++;
242: HaboobStats.timeCacheAllocate += (t2 - t1);
243: }
244:
245: payload = resp.getPayload();
246:
247: } catch (IOException ioe) {
248: // File not found
249: System.err.println("PageCache: Could not open file "
250: + fname + ": " + ioe);
251: HaboobStats.numErrors++;
252: httpNotFoundResponse notfound = new httpNotFoundResponse(
253: req, ioe.getMessage());
254: sendSink.enqueue_lossy(new httpResponder(notfound, req,
255: true));
256: return;
257: }
258:
259: entry = new cacheEntry(req, resp, af, (int) stat.length);
260: }
261:
262: if (!DEBUG_NO_FILE_READ || !DEBUG_NO_FILE_READ_SAMESIZE) {
263: aFileTbl.put(af, entry);
264: }
265: pageTbl.put(url, entry);
266:
267: if ((maxCacheSize != -1)
268: && (HaboobStats.cacheSizeBytes > maxCacheSize * 1024)) {
269: if (PROFILE)
270: t1 = System.currentTimeMillis();
271: rejectCacheEntry();
272: if (PROFILE) {
273: t2 = System.currentTimeMillis();
274: HaboobStats.numCacheReject++;
275: HaboobStats.timeCacheReject += (t2 - t1);
276: }
277: }
278:
279: if (!DEBUG_NO_FILE_READ) {
280: try {
281: af.read(payload);
282: } catch (SinkException se) {
283: // XXX Should not really happen
284: System.err
285: .println("PageCache: Got SinkException attempting read on "
286: + fname + ": " + se);
287: aFileTbl.remove(af);
288: af.close();
289: HaboobStats.numErrors++;
290: httpNotFoundResponse notfound = new httpNotFoundResponse(
291: req, se.getMessage());
292: sendSink.enqueue_lossy(new httpResponder(notfound, req,
293: true));
294: return;
295: }
296: } else {
297: // Pretend we got it already
298: if (!DEBUG_NO_FILE_READ_SAMESIZE) {
299: af.close();
300: aFileTbl.remove(af);
301: }
302: entry.done();
303: }
304:
305: HaboobStats.numCacheMisses++;
306: }
307:
308: private String getMimeType(String url) {
309: Enumeration e = mimeTbl.keys();
310: while (e.hasMoreElements()) {
311: String key = (String) e.nextElement();
312: if (url.endsWith(key))
313: return (String) mimeTbl.get(key);
314: }
315: return defaultMimeType;
316: }
317:
318: private void rejectCacheEntry() {
319: while (HaboobStats.cacheSizeBytes > maxCacheSize * 1024) {
320:
321: int victim = rand.nextInt() % pageTbl.size();
322: Enumeration e = pageTbl.keys();
323: for (int i = 0; i < victim; i++) {
324: Object obj = e.nextElement();
325: if (obj == null)
326: throw new Error(
327: "ERROR: Got null entry searching for victim "
328: + victim
329: + " in cache, pageTbl.size "
330: + pageTbl.size());
331: }
332: String url = (String) e.nextElement();
333: cacheEntry entry = (cacheEntry) pageTbl.get(url);
334: if (entry == null) {
335: throw new Error(
336: "ERROR: rejectCacheEntry got null entry for url "
337: + url);
338: }
339: // Don't reject pending entries
340: if (entry.pending)
341: continue;
342: pageTbl.remove(url);
343: HaboobStats.cacheSizeBytes -= entry.size;
344: HaboobStats.cacheSizeEntries--;
345: if (DEBUG)
346: System.err.println("Rejecting cache entry " + url
347: + " (" + entry.size + " bytes)");
348: if (DEBUG)
349: System.err.println(" Cache size now "
350: + (HaboobStats.cacheSizeBytes / 1024));
351: }
352: }
353:
354: private class cacheEntry {
355: httpOKResponse response;
356: int length;
357: boolean pending;
358: int size;
359: ssLinkedList waiting;
360: String url;
361:
362: cacheEntry(httpRequest req, httpOKResponse resp, AFile af,
363: int size) {
364: this .response = resp;
365: this .length = resp.getPayload().size;
366: this .url = req.getURL();
367: this .size = size;
368: pending = true;
369: waiting = new ssLinkedList();
370: addWaiter(req);
371: }
372:
373: synchronized void addWaiter(httpRequest req) {
374: waiting.add_to_tail(req);
375: }
376:
377: // Send response to all waiters when done reading
378: synchronized void done() {
379: if (DEBUG)
380: System.err.println("PageCache: Done with file read");
381: pending = false;
382: HaboobStats.cacheSizeEntries++;
383: HaboobStats.cacheSizeBytes += length;
384: httpRequest waiter;
385:
386: while ((waiter = (httpRequest) waiting.remove_head()) != null) {
387: httpResponder respd = new httpResponder(response,
388: waiter);
389: sendSink.enqueue_lossy(respd);
390: }
391: }
392:
393: // Send cache entry on hit
394: void send(httpRequest req) {
395: httpResponder respd = new httpResponder(response, req);
396: sendSink.enqueue_lossy(respd);
397: }
398:
399: }
400:
401: }
|