0001: // plasmaHTCache.java
0002: // -----------------------
0003: // part of YaCy
0004: // (C) by Michael Peter Christen; mc@anomic.de
0005: // first published on http://www.anomic.de
0006: // Frankfurt, Germany, 2004
0007: //
0008: // $LastChangedDate: 2008-02-03 11:21:50 +0000 (So, 03 Feb 2008) $
0009: // $LastChangedRevision: 4433 $
0010: // $LastChangedBy: orbiter $
0011: //
0012: // This program is free software; you can redistribute it and/or modify
0013: // it under the terms of the GNU General Public License as published by
0014: // the Free Software Foundation; either version 2 of the License, or
0015: // (at your option) any later version.
0016: //
0017: // This program is distributed in the hope that it will be useful,
0018: // but WITHOUT ANY WARRANTY; without even the implied warranty of
0019: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0020: // GNU General Public License for more details.
0021: //
0022: // You should have received a copy of the GNU General Public License
0023: // along with this program; if not, write to the Free Software
0024: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0025: //
0026: // Using this software in any meaning (reading, learning, copying, compiling,
0027: // running) means that you agree that the Author(s) is (are) not responsible
0028: // for cost, loss of data or any harm that may be caused directly or indirectly
0029: // by usage of this softare or this documentation. The usage of this software
0030: // is on your own risk. The installation and usage (starting/running) of this
0031: // software may allow other people or application to access your computer and
0032: // any attached devices and is highly dependent on the configuration of the
0033: // software which must be done by the user of the software; the author(s) is
0034: // (are) also not responsible for proper configuration and usage of the
0035: // software, even if provoked by documentation provided together with
0036: // the software.
0037: //
0038: // Any changes to this file according to the GPL as documented in the file
0039: // gpl.txt aside this file in the shipment you received can be done to the
0040: // lines that follows this copyright notice here, but changes must not be
0041: // done inside the copyright notive above. A re-distribution must contain
0042: // the intact and unchanged copyright notice.
0043: // Contributions and changes to the program code must be marked as such.
0044:
0045: /*
0046: Class documentation:
0047: This class has two purposes:
0048: 1. provide a object that carries path and header information
0049: that shall be used as objects within a scheduler's stack
0050: 2. static methods for a cache control and cache aging
0051: the class shall also be used to do a cache-cleaning and index creation
0052: */
0053:
0054: package de.anomic.plasma;
0055:
0056: import java.io.BufferedInputStream;
0057: import java.io.File;
0058: import java.io.FileInputStream;
0059: import java.io.FileNotFoundException;
0060: import java.io.IOException;
0061: import java.io.InputStream;
0062: import java.lang.StringBuffer;
0063: import java.net.InetAddress;
0064: import java.util.Collections;
0065: import java.util.Date;
0066: import java.util.HashMap;
0067: import java.util.HashSet;
0068: import java.util.Iterator;
0069: import java.util.LinkedList;
0070: import java.util.Map;
0071: import java.util.TreeMap;
0072: import java.util.regex.Matcher;
0073: import java.util.regex.Pattern;
0074:
0075: import de.anomic.http.httpHeader;
0076: import de.anomic.kelondro.kelondroBase64Order;
0077: import de.anomic.kelondro.kelondroDyn;
0078: import de.anomic.kelondro.kelondroMScoreCluster;
0079: import de.anomic.kelondro.kelondroMapObjects;
0080: import de.anomic.plasma.cache.IResourceInfo;
0081: import de.anomic.plasma.cache.ResourceInfoFactory;
0082: import de.anomic.plasma.cache.UnsupportedProtocolException;
0083: import de.anomic.server.serverCodings;
0084: import de.anomic.server.serverDomains;
0085: import de.anomic.server.serverFileUtils;
0086: import de.anomic.server.serverInstantThread;
0087: import de.anomic.server.serverSystem;
0088: import de.anomic.server.serverThread;
0089: import de.anomic.server.logging.serverLog;
0090: import de.anomic.tools.enumerateFiles;
0091: import de.anomic.yacy.yacySeed;
0092: import de.anomic.yacy.yacySeedDB;
0093: import de.anomic.yacy.yacyURL;
0094:
0095: public final class plasmaHTCache {
0096:
0097: public static final String DB_NAME = "responseHeader2.db";
0098:
0099: private static final int stackLimit = 150; // if we exceed that limit, we do not check idle
0100: public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
0101:
0102: private static kelondroMapObjects responseHeaderDB = null;
0103: private static final LinkedList<Entry> cacheStack = new LinkedList<Entry>();
0104: private static final Map<String, File> cacheAge = Collections
0105: .synchronizedMap(new TreeMap<String, File>()); // a <date+hash, cache-path> - relation
0106: public static long curCacheSize = 0;
0107: public static long maxCacheSize;
0108: public static File cachePath;
0109: public static final serverLog log = new serverLog("HTCACHE");
0110: public static final HashSet<File> filesInUse = new HashSet<File>(); // can we delete this file
0111: public static String cacheLayout;
0112: public static boolean cacheMigration;
0113:
0114: private static ResourceInfoFactory objFactory = new ResourceInfoFactory();
0115: private static serverThread cacheScanThread;
0116:
0117: // doctypes:
0118: public static final char DT_PDFPS = 'p';
0119: public static final char DT_TEXT = 't';
0120: public static final char DT_HTML = 'h';
0121: public static final char DT_DOC = 'd';
0122: public static final char DT_IMAGE = 'i';
0123: public static final char DT_MOVIE = 'm';
0124: public static final char DT_FLASH = 'f';
0125: public static final char DT_SHARE = 's';
0126: public static final char DT_AUDIO = 'a';
0127: public static final char DT_BINARY = 'b';
0128: public static final char DT_UNKNOWN = 'u';
0129:
0130: // appearance locations: (used for flags)
0131: public static final int AP_TITLE = 0; // title tag from html header
0132: public static final int AP_H1 = 1; // headline - top level
0133: public static final int AP_H2 = 2; // headline, second level
0134: public static final int AP_H3 = 3; // headline, 3rd level
0135: public static final int AP_H4 = 4; // headline, 4th level
0136: public static final int AP_H5 = 5; // headline, 5th level
0137: public static final int AP_H6 = 6; // headline, 6th level
0138: public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam)
0139: public static final int AP_DOM = 8; // word inside an url: in Domain
0140: public static final int AP_PATH = 9; // word inside an url: in path
0141: public static final int AP_IMG = 10; // tag inside image references
0142: public static final int AP_ANCHOR = 11; // anchor description
0143: public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance)
0144: public static final int AP_BOLD = 13; // may be interpreted as emphasized
0145: public static final int AP_ITALICS = 14; // may be interpreted as emphasized
0146: public static final int AP_WEAK = 15; // for Text that is small or bareley visible
0147: public static final int AP_INVISIBLE = 16; // good for spam detection
0148: public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags)
0149: public static final int AP_AUTHOR = 18; // word appears in author name
0150: public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags)
0151: public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags)
0152:
0153: // URL attributes
0154: public static final int UA_LOCAL = 0; // URL was crawled locally
0155: public static final int UA_TILDE = 1; // tilde appears in URL
0156: public static final int UA_REDIRECT = 2; // The URL is a redirection
0157:
0158: // local flag attributes
0159: public static final char LT_LOCAL = 'L';
0160: public static final char LT_GLOBAL = 'G';
0161:
0162: // doctype calculation
0163: public static char docType(yacyURL url) {
0164: String path = url.getPath().toLowerCase();
0165: // serverLog.logFinest("PLASMA", "docType URL=" + path);
0166: char doctype = DT_UNKNOWN;
0167: if (path.endsWith(".gif")) {
0168: doctype = DT_IMAGE;
0169: } else if (path.endsWith(".ico")) {
0170: doctype = DT_IMAGE;
0171: } else if (path.endsWith(".bmp")) {
0172: doctype = DT_IMAGE;
0173: } else if (path.endsWith(".jpg")) {
0174: doctype = DT_IMAGE;
0175: } else if (path.endsWith(".jpeg")) {
0176: doctype = DT_IMAGE;
0177: } else if (path.endsWith(".png")) {
0178: doctype = DT_IMAGE;
0179: } else if (path.endsWith(".html")) {
0180: doctype = DT_HTML;
0181: } else if (path.endsWith(".txt")) {
0182: doctype = DT_TEXT;
0183: } else if (path.endsWith(".doc")) {
0184: doctype = DT_DOC;
0185: } else if (path.endsWith(".rtf")) {
0186: doctype = DT_DOC;
0187: } else if (path.endsWith(".pdf")) {
0188: doctype = DT_PDFPS;
0189: } else if (path.endsWith(".ps")) {
0190: doctype = DT_PDFPS;
0191: } else if (path.endsWith(".avi")) {
0192: doctype = DT_MOVIE;
0193: } else if (path.endsWith(".mov")) {
0194: doctype = DT_MOVIE;
0195: } else if (path.endsWith(".qt")) {
0196: doctype = DT_MOVIE;
0197: } else if (path.endsWith(".mpg")) {
0198: doctype = DT_MOVIE;
0199: } else if (path.endsWith(".md5")) {
0200: doctype = DT_SHARE;
0201: } else if (path.endsWith(".mpeg")) {
0202: doctype = DT_MOVIE;
0203: } else if (path.endsWith(".asf")) {
0204: doctype = DT_FLASH;
0205: }
0206: return doctype;
0207: }
0208:
0209: public static char docType(String mime) {
0210: // serverLog.logFinest("PLASMA", "docType mime=" + mime);
0211: char doctype = DT_UNKNOWN;
0212: if (mime == null)
0213: doctype = DT_UNKNOWN;
0214: else if (mime.startsWith("image/"))
0215: doctype = DT_IMAGE;
0216: else if (mime.endsWith("/gif"))
0217: doctype = DT_IMAGE;
0218: else if (mime.endsWith("/jpeg"))
0219: doctype = DT_IMAGE;
0220: else if (mime.endsWith("/png"))
0221: doctype = DT_IMAGE;
0222: else if (mime.endsWith("/html"))
0223: doctype = DT_HTML;
0224: else if (mime.endsWith("/rtf"))
0225: doctype = DT_DOC;
0226: else if (mime.endsWith("/pdf"))
0227: doctype = DT_PDFPS;
0228: else if (mime.endsWith("/octet-stream"))
0229: doctype = DT_BINARY;
0230: else if (mime.endsWith("/x-shockwave-flash"))
0231: doctype = DT_FLASH;
0232: else if (mime.endsWith("/msword"))
0233: doctype = DT_DOC;
0234: else if (mime.endsWith("/mspowerpoint"))
0235: doctype = DT_DOC;
0236: else if (mime.endsWith("/postscript"))
0237: doctype = DT_PDFPS;
0238: else if (mime.startsWith("text/"))
0239: doctype = DT_TEXT;
0240: else if (mime.startsWith("image/"))
0241: doctype = DT_IMAGE;
0242: else if (mime.startsWith("audio/"))
0243: doctype = DT_AUDIO;
0244: else if (mime.startsWith("video/"))
0245: doctype = DT_MOVIE;
0246: //bz2 = application/x-bzip2
0247: //dvi = application/x-dvi
0248: //gz = application/gzip
0249: //hqx = application/mac-binhex40
0250: //lha = application/x-lzh
0251: //lzh = application/x-lzh
0252: //pac = application/x-ns-proxy-autoconfig
0253: //php = application/x-httpd-php
0254: //phtml = application/x-httpd-php
0255: //rss = application/xml
0256: //tar = application/tar
0257: //tex = application/x-tex
0258: //tgz = application/tar
0259: //torrent = application/x-bittorrent
0260: //xhtml = application/xhtml+xml
0261: //xla = application/msexcel
0262: //xls = application/msexcel
0263: //xsl = application/xml
0264: //xml = application/xml
0265: //Z = application/x-compress
0266: //zip = application/zip
0267: return doctype;
0268: }
0269:
0270: public static void init(File htCachePath, long CacheSizeMax,
0271: long preloadTime, String layout, boolean migration) {
0272:
0273: cachePath = htCachePath;
0274: cacheLayout = layout;
0275: cacheMigration = migration;
0276: maxCacheSize = CacheSizeMax;
0277:
0278: // reset old HTCache ?
0279: String[] list = cachePath.list();
0280: if (list != null) {
0281: File object;
0282: for (int i = list.length - 1; i >= 0; i--) {
0283: object = new File(cachePath, list[i]);
0284:
0285: if (!object.isDirectory()) {
0286: continue;
0287: }
0288:
0289: if (!object.getName().equals("http")
0290: && !object.getName().equals("yacy")
0291: && !object.getName().equals("https")
0292: && !object.getName().equals("ftp")) {
0293: deleteOldHTCache(cachePath);
0294: break;
0295:
0296: }
0297: }
0298: }
0299: File testpath = new File(cachePath, "/http/");
0300: list = testpath.list();
0301: if (list != null) {
0302: File object;
0303: for (int i = list.length - 1; i >= 0; i--) {
0304: object = new File(testpath, list[i]);
0305:
0306: if (!object.isDirectory()) {
0307: continue;
0308: }
0309:
0310: if (!object.getName().equals("ip")
0311: && !object.getName().equals("other")
0312: && !object.getName().equals("www")) {
0313: deleteOldHTCache(cachePath);
0314: break;
0315: }
0316: }
0317: }
0318: testpath = null;
0319:
0320: // set/make cache path
0321: if (!htCachePath.exists()) {
0322: htCachePath.mkdirs();
0323: }
0324: if (!htCachePath.isDirectory()) {
0325: // if the cache does not exists or is a file and not a directory, panic
0326: log
0327: .logSevere("the cache path "
0328: + htCachePath.toString()
0329: + " is not a directory or does not exists and cannot be created");
0330: System.exit(0);
0331: }
0332:
0333: // open the response header database
0334: openResponseHeaderDB(preloadTime);
0335:
0336: // start the cache startup thread
0337: // this will collect information about the current cache size and elements
0338: try {
0339: cacheScanThread = serverInstantThread.oneTimeJob(Class
0340: .forName("de.anomic.plasma.plasmaHTCache"),
0341: "cacheScan", log, 120000);
0342: } catch (ClassNotFoundException e) {
0343: e.printStackTrace();
0344: }
0345: }
0346:
0347: private static void resetResponseHeaderDB() {
0348: if (responseHeaderDB != null)
0349: responseHeaderDB.close();
0350: File dbfile = new File(cachePath, DB_NAME);
0351: if (dbfile.exists())
0352: dbfile.delete();
0353: openResponseHeaderDB(0);
0354: }
0355:
0356: private static void openResponseHeaderDB(long preloadTime) {
0357: // open the response header database
0358: File dbfile = new File(cachePath, DB_NAME);
0359: responseHeaderDB = new kelondroMapObjects(new kelondroDyn(
0360: dbfile, true, true, preloadTime,
0361: yacySeedDB.commonHashLength, 150, '#',
0362: kelondroBase64Order.enhancedCoder, false, false, true),
0363: 500);
0364: }
0365:
0366: private static void deleteOldHTCache(File directory) {
0367: String[] list = directory.list();
0368: if (list != null) {
0369: File object;
0370: for (int i = list.length - 1; i >= 0; i--) {
0371: object = new File(directory, list[i]);
0372: if (object.isFile()) {
0373: object.delete();
0374: } else {
0375: deleteOldHTCache(object);
0376: }
0377: }
0378: }
0379: directory.delete();
0380: }
0381:
0382: public static int size() {
0383: synchronized (cacheStack) {
0384: return cacheStack.size();
0385: }
0386: }
0387:
0388: public static int dbSize() {
0389: return responseHeaderDB.size();
0390: }
0391:
0392: public static void push(Entry entry) {
0393: synchronized (cacheStack) {
0394: cacheStack.add(entry);
0395: }
0396: }
0397:
0398: public static Entry pop() {
0399: synchronized (cacheStack) {
0400: if (cacheStack.size() > 0)
0401: return cacheStack.removeFirst();
0402: return null;
0403: }
0404: }
0405:
0406: /**
0407: * This method changes the HTCache size.<br>
0408: * @param the new cache size in bytes
0409: */
0410: public static void setCacheSize(long newCacheSize) {
0411: maxCacheSize = newCacheSize;
0412: }
0413:
0414: /**
0415: * This method returns the free HTCache size.<br>
0416: * @return the cache size in bytes
0417: */
0418: public static long getFreeSize() {
0419: return (curCacheSize >= maxCacheSize) ? 0 : maxCacheSize
0420: - curCacheSize;
0421: }
0422:
0423: public static boolean writeResourceContent(yacyURL url, byte[] array) {
0424: if (array == null)
0425: return false;
0426: File file = getCachePath(url);
0427: try {
0428: deleteFile(file);
0429: file.getParentFile().mkdirs();
0430: serverFileUtils.write(array, file);
0431: } catch (FileNotFoundException e) {
0432: // this is the case of a "(Not a directory)" error, which should be prohibited
0433: // by the shallStoreCache() property. However, sometimes the error still occurs
0434: // In this case do nothing.
0435: log.logSevere("File storage failed (not a directory): "
0436: + e.getMessage());
0437: return false;
0438: } catch (IOException e) {
0439: log.logSevere("File storage failed (IO error): "
0440: + e.getMessage());
0441: return false;
0442: }
0443: writeFileAnnouncement(file);
0444: return true;
0445: }
0446:
0447: private static long lastcleanup = System.currentTimeMillis();
0448:
0449: public static void writeFileAnnouncement(File file) {
0450: synchronized (cacheAge) {
0451: if (file.exists()) {
0452: curCacheSize += file.length();
0453: if (System.currentTimeMillis() - lastcleanup > 300000) {
0454: // call the cleanup job only every 5 minutes
0455: cleanup();
0456: lastcleanup = System.currentTimeMillis();
0457: }
0458: cacheAge
0459: .put(ageString(file.lastModified(), file), file);
0460: }
0461: }
0462: }
0463:
0464: public static boolean deleteURLfromCache(yacyURL url) {
0465: if (deleteFileandDirs(getCachePath(url), "FROM")) {
0466: try {
0467: // As the file is gone, the entry in responseHeader.db is not needed anymore
0468: log
0469: .logFinest("Trying to remove responseHeader from URL: "
0470: + url.toNormalform(false, true));
0471: responseHeaderDB.remove(url.hash());
0472: } catch (IOException e) {
0473: resetResponseHeaderDB();
0474: log.logInfo(
0475: "IOExeption removing response header from DB: "
0476: + e.getMessage(), e);
0477: }
0478: return true;
0479: }
0480: return false;
0481: }
0482:
0483: private static boolean deleteFile(File obj) {
0484: if (obj.exists() && !filesInUse.contains(obj)) {
0485: long size = obj.length();
0486: if (obj.delete()) {
0487: curCacheSize -= size;
0488: return true;
0489: }
0490: }
0491: return false;
0492: }
0493:
0494: private static boolean deleteFileandDirs(File obj, String msg) {
0495: if (deleteFile(obj)) {
0496: log.logInfo("DELETED " + msg + " CACHE: " + obj.toString());
0497: obj = obj.getParentFile();
0498: // If the has been emptied, remove it
0499: // Loop as long as we produce empty driectoriers, but stop at HTCACHE
0500: while ((!(obj.equals(cachePath))) && (obj.isDirectory())
0501: && (obj.list().length == 0)) {
0502: if (obj.delete())
0503: log.logFine("DELETED EMPTY DIRECTORY : "
0504: + obj.toString());
0505: obj = obj.getParentFile();
0506: }
0507: return true;
0508: }
0509: return false;
0510: }
0511:
0512: private static void cleanupDoIt(long newCacheSize) {
0513: File file;
0514: synchronized (cacheAge) {
0515: Iterator<Map.Entry<String, File>> iter = cacheAge
0516: .entrySet().iterator();
0517: Map.Entry<String, File> entry;
0518: while (iter.hasNext() && curCacheSize >= newCacheSize) {
0519: if (Thread.currentThread().isInterrupted())
0520: return;
0521: entry = iter.next();
0522: String key = entry.getKey();
0523: file = entry.getValue();
0524: long t = Long.parseLong(key.substring(0, 16), 16);
0525: if (System.currentTimeMillis() - t < 300000)
0526: break; // files must have been at least 5 minutes in the cache before they are deleted
0527: if (file != null) {
0528: if (filesInUse.contains(file))
0529: continue;
0530: log.logFinest("Trying to delete [" + key
0531: + "] = old file: " + file.toString());
0532: // This needs to be called *before* the file is deleted
0533: String urlHash = getHash(file);
0534: if (deleteFileandDirs(file, "OLD")) {
0535: try {
0536: // As the file is gone, the entry in responseHeader.db is not needed anymore
0537: if (urlHash != null) {
0538: log
0539: .logFinest("Trying to remove responseHeader for URLhash: "
0540: + urlHash);
0541: responseHeaderDB.remove(urlHash);
0542: } else {
0543: yacyURL url = getURL(file);
0544: if (url != null) {
0545: log
0546: .logFinest("Trying to remove responseHeader for URL: "
0547: + url
0548: .toNormalform(
0549: false,
0550: true));
0551: responseHeaderDB.remove(url.hash());
0552: }
0553: }
0554: } catch (IOException e) {
0555: log.logInfo(
0556: "IOExeption removing response header from DB: "
0557: + e.getMessage(), e);
0558: }
0559: }
0560: }
0561: iter.remove();
0562: }
0563: }
0564: }
0565:
0566: private static void cleanup() {
0567: // clean up cache to have 4% (enough) space for next entries
0568: if (cacheAge.size() > 0 && curCacheSize >= maxCacheSize
0569: && maxCacheSize > 0) {
0570: cleanupDoIt(maxCacheSize - (maxCacheSize / 100) * 4);
0571: }
0572: }
0573:
0574: public static void close() {
0575: // closing cache scan if still running
0576: if ((cacheScanThread != null) && (cacheScanThread.isAlive())) {
0577: cacheScanThread.terminate(true);
0578: }
0579:
0580: // closing DB
0581: responseHeaderDB.close();
0582: }
0583:
0584: private static String ageString(long date, File f) {
0585: StringBuffer sb = new StringBuffer(32);
0586: String s = Long.toHexString(date);
0587: for (int i = s.length(); i < 16; i++)
0588: sb.append('0');
0589: sb.append(s);
0590: s = Integer.toHexString(f.hashCode());
0591: for (int i = s.length(); i < 8; i++)
0592: sb.append('0');
0593: sb.append(s);
0594: return sb.toString();
0595: }
0596:
0597: public static void cacheScan() {
0598: log.logConfig("STARTING HTCACHE SCANNING");
0599: kelondroMScoreCluster<String> doms = new kelondroMScoreCluster<String>();
0600: int fileCount = 0;
0601: enumerateFiles fileEnum = new enumerateFiles(cachePath, true,
0602: false, true, true);
0603: File dbfile = new File(cachePath, "responseHeader.db");
0604: while (fileEnum.hasMoreElements()) {
0605: if (Thread.currentThread().isInterrupted())
0606: return;
0607: fileCount++;
0608: File nextFile = (File) fileEnum.nextElement();
0609: long nextFileModDate = nextFile.lastModified();
0610: //System.out.println("Cache: " + dom(f));
0611: doms.incScore(dom(nextFile));
0612: curCacheSize += nextFile.length();
0613: if (!dbfile.equals(nextFile))
0614: cacheAge.put(ageString(nextFileModDate, nextFile),
0615: nextFile);
0616: try {
0617: Thread.sleep(10);
0618: } catch (InterruptedException e) {
0619: return;
0620: }
0621: }
0622: //System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey()));
0623: long ageHours = 0;
0624: if (!cacheAge.isEmpty()) {
0625: Iterator<String> i = cacheAge.keySet().iterator();
0626: if (i.hasNext())
0627: try {
0628: ageHours = (System.currentTimeMillis() - Long
0629: .parseLong(i.next().substring(0, 16), 16)) / 3600000;
0630: } catch (NumberFormatException e) {
0631: ageHours = 0;
0632: }
0633: else {
0634: ageHours = 0;
0635: }
0636: }
0637: log.logConfig("CACHE SCANNED, CONTAINS "
0638: + fileCount
0639: + " FILES = "
0640: + curCacheSize
0641: / 1048576
0642: + "MB, OLDEST IS "
0643: + ((ageHours < 24) ? (ageHours + " HOURS")
0644: : ((ageHours / 24) + " DAYS")) + " OLD");
0645: cleanup();
0646:
0647: log.logConfig("STARTING DNS PREFETCH");
0648: // start to prefetch IPs from DNS
0649: String dom;
0650: long start = System.currentTimeMillis();
0651: String result = "";
0652: fileCount = 0;
0653: while ((doms.size() > 0) && (fileCount < 50)
0654: && ((System.currentTimeMillis() - start) < 60000)) {
0655: if (Thread.currentThread().isInterrupted())
0656: return;
0657: dom = (String) doms.getMaxObject();
0658: InetAddress ip = serverDomains.dnsResolve(dom);
0659: if (ip == null)
0660: continue;
0661: result += ", " + dom + "=" + ip.getHostAddress();
0662: log.logConfig("PRE-FILLED " + dom + "="
0663: + ip.getHostAddress());
0664: fileCount++;
0665: doms.deleteScore(dom);
0666: // wait a short while to prevent that this looks like a DoS
0667: try {
0668: Thread.sleep(100);
0669: } catch (InterruptedException e) {
0670: return;
0671: }
0672: }
0673: if (result.length() > 2)
0674: log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + fileCount
0675: + " ADDRESSES: " + result.substring(2));
0676: }
0677:
0678: private static String dom(File f) {
0679: String s = f.toString().substring(
0680: cachePath.toString().length() + 1);
0681: int p = s.indexOf("/");
0682: if (p < 0)
0683: p = s.indexOf("\\");
0684: if (p < 0)
0685: return null;
0686: // remove the protokoll
0687: s = s.substring(p + 1);
0688: p = s.indexOf("/");
0689: if (p < 0)
0690: p = s.indexOf("\\");
0691: if (p < 0)
0692: return null;
0693: String prefix = new String("");
0694: if (s.startsWith("www"))
0695: prefix = new String("www.");
0696: // remove the www|other|ip directory
0697: s = s.substring(p + 1);
0698: p = s.indexOf("/");
0699: if (p < 0)
0700: p = s.indexOf("\\");
0701: if (p < 0)
0702: return null;
0703: int e = s.indexOf("!");
0704: if ((e > 0) && (e < p))
0705: p = e; // strip port
0706: return prefix + s.substring(0, p);
0707: }
0708:
0709: /**
0710: * Returns an object containing metadata about a cached resource
0711: * @param url the {@link URL} of the resource
0712: * @return an {@link IResourceInfo info object}
0713: * @throws <b>IllegalAccessException</b> if the {@link SecurityManager} doesn't allow instantiation
0714: * of the info object with the given protocol
0715: * @throws <b>UnsupportedProtocolException</b> if the protocol is not supported and therefore the
0716: * info object couldn't be created
0717: */
0718: public static IResourceInfo loadResourceInfo(yacyURL url)
0719: throws UnsupportedProtocolException, IllegalAccessException {
0720:
0721: // loading data from database
0722: Map<String, String> hdb = responseHeaderDB.getMap(url.hash());
0723: if (hdb == null)
0724: return null;
0725:
0726: // generate the cached object
0727: IResourceInfo cachedObj = objFactory.buildResourceInfoObj(url,
0728: hdb);
0729: return cachedObj;
0730: }
0731:
0732: public static ResourceInfoFactory getResourceInfoFactory() {
0733: return objFactory;
0734: }
0735:
0736: public static boolean full() {
0737: return (cacheStack.size() > stackLimit);
0738: }
0739:
0740: public static boolean empty() {
0741: return (cacheStack.size() == 0);
0742: }
0743:
0744: public static boolean isPicture(String mimeType) {
0745: if (mimeType == null)
0746: return false;
0747: return mimeType.toUpperCase().startsWith("IMAGE");
0748: }
0749:
0750: public static boolean isText(String mimeType) {
0751: // Object ct = response.get(httpHeader.CONTENT_TYPE);
0752: // if (ct == null) return false;
0753: // String t = ((String)ct).toLowerCase();
0754: // return ((t.startsWith("text")) || (t.equals("application/xhtml+xml")));
0755: return plasmaParser.supportedMimeTypesContains(mimeType);
0756: }
0757:
0758: public static boolean noIndexingURL(yacyURL url) {
0759: if (url == null)
0760: return false;
0761: String urlString = url.toString().toLowerCase();
0762:
0763: //http://www.yacy.net/getimage.php?image.png
0764:
0765: int idx = urlString.indexOf("?");
0766: if (idx > 0)
0767: urlString = urlString.substring(0, idx);
0768:
0769: //http://www.yacy.net/getimage.php
0770:
0771: idx = urlString.lastIndexOf(".");
0772: if (idx > 0)
0773: urlString = urlString.substring(idx + 1);
0774:
0775: //php
0776:
0777: return plasmaParser.mediaExtContains(urlString);
0778: }
0779:
0780: /**
0781: * This function moves an old cached object (if it exists) to the new position
0782: */
0783: private static void moveCachedObject(File oldpath, File newpath) {
0784: try {
0785: if (oldpath.exists() && oldpath.isFile()
0786: && (!newpath.exists())) {
0787: long d = oldpath.lastModified();
0788: newpath.getParentFile().mkdirs();
0789: if (oldpath.renameTo(newpath)) {
0790: cacheAge.put(ageString(d, newpath), newpath);
0791: File obj = oldpath.getParentFile();
0792: while ((!(obj.equals(cachePath)))
0793: && (obj.isDirectory())
0794: && (obj.list().length == 0)) {
0795: if (obj.delete())
0796: log.logFine("DELETED EMPTY DIRECTORY : "
0797: + obj.toString());
0798: obj = obj.getParentFile();
0799: }
0800: }
0801: }
0802: } catch (Exception e) {
0803: log.logFine("moveCachedObject('" + oldpath.toString()
0804: + "','" + newpath.toString() + "')", e);
0805: }
0806: }
0807:
0808: private static String replaceRegex(String input, String regex,
0809: String replacement) {
0810: if (input == null) {
0811: return "";
0812: }
0813: if (input.length() > 0) {
0814: final Pattern searchPattern = Pattern.compile(regex);
0815: final Matcher matcher = searchPattern.matcher(input);
0816: while (matcher.find()) {
0817: input = matcher.replaceAll(replacement);
0818: matcher.reset(input);
0819: }
0820: }
0821: return input;
0822: }
0823:
0824: /**
0825: * this method creates from a given host and path a cache path
0826: * from a given host (which may also be an IPv4 - number, but not IPv6 or
0827: * a domain; all without leading 'http://') and a path (which must start
0828: * with a leading '/', and may also end in an '/') a path to a file
0829: * in the file system with root as given in cachePath is constructed
0830: * it will also be ensured, that the complete path exists; if necessary
0831: * that path will be generated
0832: * @return new File
0833: */
0834: public static File getCachePath(final yacyURL url) {
0835: // this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString());
0836:
0837: // peer.yacy || www.peer.yacy = http/yacy/peer
0838: // protocol://www.doamin.net = protocol/www/domain.net
0839: // protocol://other.doamin.net = protocol/other/other.domain.net
0840: // protocol://xxx.xxx.xxx.xxx = protocol/ip/xxx.xxx.xxx.xxx
0841:
0842: String host = url.getHost().toLowerCase();
0843:
0844: String path = url.getPath();
0845: final String query = url.getQuery();
0846: if (!path.startsWith("/")) {
0847: path = "/" + path;
0848: }
0849: if (path.endsWith("/") && query == null) {
0850: path = path + "ndx";
0851: }
0852:
0853: // yes this is not reversible, but that is not needed
0854: path = replaceRegex(path, "/\\.\\./", "/!!/");
0855: path = replaceRegex(path, "(\"|\\\\|\\*|\\?|:|<|>|\\|+)", "_"); // hier wird kein '/' gefiltert
0856: String extention = null;
0857: int d = path.lastIndexOf(".");
0858: int s = path.lastIndexOf("/");
0859: if ((d >= 0) && (d > s)) {
0860: extention = path.substring(d);
0861: } else if (path.endsWith("/ndx")) {
0862: extention = new String(".html"); // Just a wild guess
0863: }
0864: path = path.concat(replaceRegex(query,
0865: "(\"|\\\\|\\*|\\?|/|:|<|>|\\|+)", "_"));
0866:
0867: // only set NO default ports
0868: int port = url.getPort();
0869: String protocol = url.getProtocol();
0870: if (port >= 0) {
0871: if ((port == 80 && protocol.equals("http"))
0872: || (port == 443 && protocol.equals("https"))
0873: || (port == 21 && protocol.equals("ftp"))) {
0874: port = -1;
0875: }
0876: }
0877: if (host.endsWith(".yacy")) {
0878: host = host.substring(0, host.length() - 5);
0879: if (host.startsWith("www.")) {
0880: host = host.substring(4);
0881: }
0882: protocol = "yacy";
0883: } else if (host.startsWith("www.")) {
0884: host = "www/" + host.substring(4);
0885: } else if (host
0886: .matches("\\d{2,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) {
0887: host = "ip/" + host;
0888: } else {
0889: host = "other/" + host;
0890: }
0891: StringBuffer fileName = new StringBuffer();
0892: fileName.append(protocol).append('/').append(host);
0893: if (port >= 0) {
0894: fileName.append('!').append(port);
0895: }
0896:
0897: // generate cache path according to storage method
0898: if (cacheLayout.equals("tree")) {
0899: File FileTree = treeFile(fileName, "tree", path);
0900: if (cacheMigration) {
0901: moveCachedObject(hashFile(fileName, "hash", extention,
0902: url.hash()), FileTree);
0903: moveCachedObject(hashFile(fileName, null, extention,
0904: url.hash()), FileTree); // temporary migration
0905: moveCachedObject(treeFile(fileName, null, path),
0906: FileTree); // temporary migration
0907: }
0908: return FileTree;
0909: }
0910: if (cacheLayout.equals("hash")) {
0911: File FileFlat = hashFile(fileName, "hash", extention, url
0912: .hash());
0913: if (cacheMigration) {
0914: moveCachedObject(treeFile(fileName, "tree", path),
0915: FileFlat);
0916: moveCachedObject(treeFile(fileName, null, path),
0917: FileFlat); // temporary migration
0918: moveCachedObject(hashFile(fileName, null, extention,
0919: url.hash()), FileFlat); // temporary migration
0920: }
0921: return FileFlat;
0922: }
0923: return null;
0924: }
0925:
0926: private static File treeFile(StringBuffer fileName, String prefix,
0927: String path) {
0928: StringBuffer f = new StringBuffer(fileName.length() + 30);
0929: f.append(fileName);
0930: if (prefix != null)
0931: f.append('/').append(prefix);
0932: f.append(path);
0933: return new File(cachePath, f.toString());
0934: }
0935:
0936: private static File hashFile(StringBuffer fileName, String prefix,
0937: String extention, String urlhash) {
0938: String hexHash = yacySeed.b64Hash2hexHash(urlhash);
0939: StringBuffer f = new StringBuffer(fileName.length() + 30);
0940: f.append(fileName);
0941: if (prefix != null)
0942: f.append('/').append(prefix);
0943: f.append('/').append(hexHash.substring(0, 2)).append('/')
0944: .append(hexHash.substring(2, 4)).append('/').append(
0945: hexHash);
0946: if (extention != null)
0947: fileName.append(extention);
0948: return new File(cachePath, f.toString());
0949: }
0950:
0951: /**
0952: * This is a helper function that extracts the Hash from the filename
0953: */
0954: public static String getHash(final File f) {
0955: if ((!f.isFile()) || (f.getPath().indexOf("hash") < 0))
0956: return null;
0957: String name = f.getName();
0958: if (name.length() < 18)
0959: return null; // not a hash file name
0960: String hexHash = name.substring(0, 18);
0961: if (hexHash.indexOf('.') >= 0)
0962: return null;
0963: try {
0964: String hash = kelondroBase64Order.enhancedCoder
0965: .encode(serverCodings.decodeHex(hexHash));
0966: if (hash.length() == yacySeedDB.commonHashLength)
0967: return hash;
0968: return null;
0969: } catch (Exception e) {
0970: //log.logWarning("getHash: " + e.getMessage(), e);
0971: return null;
0972: }
0973: }
0974:
0975: /**
0976: * this is the reverse function to getCachePath: it constructs the url as string
0977: * from a given storage path
0978: */
0979: public static yacyURL getURL(final File f) {
0980: // this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]");
0981: final String urlHash = getHash(f);
0982: if (urlHash != null) {
0983: yacyURL url = null;
0984: // try the urlPool
0985: try {
0986: url = plasmaSwitchboard.getSwitchboard()
0987: .getURL(urlHash);
0988: } catch (Exception e) {
0989: log
0990: .logWarning(
0991: "getURL(" + urlHash + "): " /*+ e.getMessage()*/,
0992: e);
0993: url = null;
0994: }
0995: if (url != null)
0996: return url;
0997: // try responseHeaderDB
0998: Map<String, String> hdb = responseHeaderDB.getMap(urlHash);
0999: if (hdb != null) {
1000: Object origRequestLine = hdb
1001: .get(httpHeader.X_YACY_ORIGINAL_REQUEST_LINE);
1002: if ((origRequestLine != null)
1003: && (origRequestLine instanceof String)) {
1004: int i = ((String) origRequestLine).indexOf(" ");
1005: if (i >= 0) {
1006: String s = ((String) origRequestLine)
1007: .substring(i).trim();
1008: i = s.indexOf(" ");
1009: try {
1010: url = new yacyURL((i < 0) ? s : s
1011: .substring(0, i), urlHash);
1012: } catch (final Exception e) {
1013: url = null;
1014: }
1015: }
1016: }
1017: }
1018: if (url != null)
1019: return url;
1020: }
1021: // If we can't get the correct URL, it seems to be a treeed file
1022: String c = cachePath.toString().replace('\\', '/');
1023: String path = f.toString().replace('\\', '/');
1024: int pos;
1025: if ((pos = path.indexOf("/tree")) >= 0)
1026: path = path.substring(0, pos) + path.substring(pos + 5);
1027:
1028: if (path.endsWith("ndx")) {
1029: path = path.substring(0, path.length() - 3);
1030: }
1031:
1032: if ((pos = path.lastIndexOf(c)) == 0) {
1033: path = path.substring(pos + c.length());
1034: while (path.startsWith("/")) {
1035: path = path.substring(1);
1036: }
1037:
1038: pos = path.indexOf("!");
1039: if (pos >= 0) {
1040: path = path.substring(0, pos) + ":"
1041: + path.substring(pos + 1);
1042: }
1043:
1044: String protocol = "http://";
1045: String host = "";
1046: if (path.startsWith("yacy/")) {
1047: path = path.substring(5);
1048:
1049: pos = path.indexOf("/");
1050: if (pos > 0) {
1051: host = path.substring(0, pos);
1052: path = path.substring(pos);
1053: } else {
1054: host = path;
1055: path = "";
1056: }
1057: pos = host.indexOf(":");
1058: if (pos > 0) {
1059: host = host.substring(0, pos) + ".yacy"
1060: + host.substring(pos);
1061: } else {
1062: host = host + ".yacy";
1063: }
1064:
1065: } else {
1066: if (path.startsWith("http/")) {
1067: path = path.substring(5);
1068: } else if (path.startsWith("https/")) {
1069: protocol = "https://";
1070: path = path.substring(6);
1071: } else if (path.startsWith("ftp/")) {
1072: protocol = "ftp://";
1073: path = path.substring(4);
1074: } else {
1075: return null;
1076: }
1077: if (path.startsWith("www/")) {
1078: path = path.substring(4);
1079: host = "www.";
1080: } else if (path.startsWith("other/")) {
1081: path = path.substring(6);
1082: } else if (path.startsWith("ip/")) {
1083: path = path.substring(3);
1084: }
1085: pos = path.indexOf("/");
1086: if (pos > 0) {
1087: host = host + path.substring(0, pos);
1088: path = path.substring(pos);
1089: } else {
1090: host = host + path;
1091: path = "";
1092: }
1093: }
1094:
1095: if (!path.equals("")) {
1096: final Pattern pathPattern = Pattern.compile("/!!/");
1097: final Matcher matcher = pathPattern.matcher(path);
1098: while (matcher.find()) {
1099: path = matcher.replaceAll("/\\.\\./");
1100: matcher.reset(path);
1101: }
1102: }
1103:
1104: // this.log.logFinest("plasmaHTCache: getURL: OUT=" + s);
1105: try {
1106: return new yacyURL(protocol + host + path, null);
1107: } catch (final Exception e) {
1108: return null;
1109: }
1110: }
1111: return null;
1112: }
1113:
1114: /**
1115: * Returns the content of a cached resource as {@link InputStream}
1116: * @param url the requested resource
1117: * @return the resource content as {@link InputStream}. In no data
1118: * is available or the cached file is not readable, <code>null</code>
1119: * is returned.
1120: */
1121: public static InputStream getResourceContentStream(yacyURL url) {
1122: // load the url as resource from the cache
1123: File f = getCachePath(url);
1124: if (f.exists() && f.canRead())
1125: try {
1126: return new BufferedInputStream(new FileInputStream(f));
1127: } catch (IOException e) {
1128: log.logSevere(
1129: "Unable to create a BufferedInputStream from file "
1130: + f, e);
1131: return null;
1132: }
1133: return null;
1134: }
1135:
1136: public static long getResourceContentLength(yacyURL url) {
1137: // load the url as resource from the cache
1138: File f = getCachePath(url);
1139: if (f.exists() && f.canRead()) {
1140: return f.length();
1141: }
1142: return 0;
1143: }
1144:
1145: public static Entry newEntry(Date initDate, int depth, yacyURL url,
1146: String name, String responseStatus, IResourceInfo docInfo,
1147: String initiator, plasmaCrawlProfile.entry profile) {
1148: return new Entry(initDate, depth, url, name, responseStatus,
1149: docInfo, initiator, profile);
1150: }
1151:
1152: public final static class Entry {
1153:
1154: // the class objects
1155: private Date initDate; // the date when the request happened; will be used as a key
1156: private int depth; // the depth of prefetching
1157: private String responseStatus;
1158: private File cacheFile; // the cache file
1159: private byte[] cacheArray; // or the cache as byte-array
1160: private yacyURL url;
1161: private String name; // the name of the link, read as anchor from an <a>-tag
1162: private Date lastModified;
1163: private char doctype;
1164: private String language;
1165: private plasmaCrawlProfile.entry profile;
1166: private String initiator;
1167:
1168: /**
1169: * protocolspecific information about the resource
1170: */
1171: private IResourceInfo resInfo;
1172:
1173: protected Object clone() throws CloneNotSupportedException {
1174: return new Entry(this .initDate, this .depth, this .url,
1175: this .name, this .responseStatus, this .resInfo,
1176: this .initiator, this .profile);
1177: }
1178:
1179: public Entry(Date initDate, int depth, yacyURL url,
1180: String name, String responseStatus,
1181: IResourceInfo resourceInfo, String initiator,
1182: plasmaCrawlProfile.entry profile) {
1183: if (resourceInfo == null) {
1184: System.out
1185: .println("Content information object is null. "
1186: + url);
1187: System.exit(0);
1188: }
1189: this .resInfo = resourceInfo;
1190: this .url = url;
1191: this .name = name;
1192: this .cacheFile = getCachePath(this .url);
1193:
1194: // assigned:
1195: this .initDate = initDate;
1196: this .depth = depth;
1197: this .responseStatus = responseStatus;
1198: this .profile = profile;
1199: this .initiator = (initiator == null) ? null : ((initiator
1200: .length() == 0) ? null : initiator);
1201:
1202: // getting the last modified date
1203: this .lastModified = resourceInfo.getModificationDate();
1204:
1205: // getting the doctype
1206: this .doctype = docType(resourceInfo.getMimeType());
1207: if (this .doctype == DT_UNKNOWN)
1208: this .doctype = docType(url);
1209: this .language = yacyURL.language(url);
1210:
1211: // to be defined later:
1212: this .cacheArray = null;
1213: }
1214:
1215: public String name() {
1216: // the anchor name; can be either the text inside the anchor tag or the page description after loading of the page
1217: return this .name;
1218: }
1219:
1220: public yacyURL url() {
1221: return this .url;
1222: }
1223:
1224: public String urlHash() {
1225: return this .url.hash();
1226: }
1227:
1228: public Date lastModified() {
1229: return this .lastModified;
1230: }
1231:
1232: public String language() {
1233: return this .language;
1234: }
1235:
1236: public plasmaCrawlProfile.entry profile() {
1237: return this .profile;
1238: }
1239:
1240: public String initiator() {
1241: return this .initiator;
1242: }
1243:
1244: public boolean proxy() {
1245: return initiator() == null;
1246: }
1247:
1248: public long size() {
1249: if (this .cacheArray == null)
1250: return 0;
1251: return this .cacheArray.length;
1252: }
1253:
1254: public int depth() {
1255: return this .depth;
1256: }
1257:
1258: public yacyURL referrerURL() {
1259: return (this .resInfo == null) ? null : this .resInfo
1260: .getRefererUrl();
1261: }
1262:
1263: public File cacheFile() {
1264: return this .cacheFile;
1265: }
1266:
1267: public void setCacheArray(byte[] data) {
1268: this .cacheArray = data;
1269: }
1270:
1271: public byte[] cacheArray() {
1272: return this .cacheArray;
1273: }
1274:
1275: public IResourceInfo getDocumentInfo() {
1276: return this .resInfo;
1277: }
1278:
1279: public boolean writeResourceInfo() {
1280: if (this .resInfo == null)
1281: return false;
1282: try {
1283: HashMap<String, String> hm = new HashMap<String, String>();
1284: hm.putAll(this .resInfo.getMap());
1285: responseHeaderDB.set(this .url.hash(), hm);
1286: } catch (Exception e) {
1287: resetResponseHeaderDB();
1288: return false;
1289: }
1290: return true;
1291: }
1292:
1293: public String getMimeType() {
1294: return (this .resInfo == null) ? null : this .resInfo
1295: .getMimeType();
1296: }
1297:
1298: public Date ifModifiedSince() {
1299: return (this .resInfo == null) ? null : this .resInfo
1300: .ifModifiedSince();
1301: }
1302:
1303: public boolean requestWithCookie() {
1304: return (this .resInfo == null) ? false : this .resInfo
1305: .requestWithCookie();
1306: }
1307:
1308: public boolean requestProhibitsIndexing() {
1309: return (this .resInfo == null) ? false : this .resInfo
1310: .requestProhibitsIndexing();
1311: }
1312:
1313: /*
1314: public boolean update() {
1315: return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));
1316: }
1317: */
1318:
1319: // the following three methods for cache read/write granting shall be as loose as possible
1320: // but also as strict as necessary to enable caching of most items
1321: /**
1322: * @return NULL if the answer is TRUE, in case of FALSE, the reason as String is returned
1323: */
1324: public String shallStoreCacheForProxy() {
1325:
1326: // check profile (disabled: we will check this in the plasmaSwitchboard)
1327: //if (!this.profile.storeHTCache()) { return "storage_not_wanted"; }
1328:
1329: // decide upon header information if a specific file should be stored to the cache or not
1330: // if the storage was requested by prefetching, the request map is null
1331:
1332: // check status code
1333: if ((this .resInfo != null)
1334: && (!this .resInfo
1335: .validResponseStatus(this .responseStatus))) {
1336: return "bad_status_"
1337: + this .responseStatus.substring(0, 3);
1338: }
1339:
1340: // check storage location
1341: // sometimes a file name is equal to a path name in the same directory;
1342: // or sometimes a file name is equal a directory name created earlier;
1343: // we cannot match that here in the cache file path and therefore omit writing into the cache
1344: if (this .cacheFile.getParentFile().isFile()
1345: || this .cacheFile.isDirectory()) {
1346: return "path_ambiguous";
1347: }
1348: if (this .cacheFile.toString().indexOf("..") >= 0) {
1349: return "path_dangerous";
1350: }
1351: if (this .cacheFile.getAbsolutePath().length() > serverSystem.maxPathLength) {
1352: return "path too long";
1353: }
1354:
1355: // -CGI access in request
1356: // CGI access makes the page very individual, and therefore not usable in caches
1357: if (this .url.isPOST() && !this .profile.crawlingQ()) {
1358: return "dynamic_post";
1359: }
1360: if (this .url.isCGI()) {
1361: return "dynamic_cgi";
1362: }
1363:
1364: if (this .resInfo != null) {
1365: return this .resInfo.shallStoreCacheForProxy();
1366: }
1367:
1368: return null;
1369: }
1370:
1371: /**
1372: * decide upon header information if a specific file should be taken from the cache or not
1373: * @return whether the file should be taken from the cache
1374: */
1375: public boolean shallUseCacheForProxy() {
1376:
1377: // -CGI access in request
1378: // CGI access makes the page very individual, and therefore not usable in caches
1379: if (this .url.isPOST()) {
1380: return false;
1381: }
1382: if (this .url.isCGI()) {
1383: return false;
1384: }
1385:
1386: if (this .resInfo != null) {
1387: return this .resInfo.shallUseCacheForProxy();
1388: }
1389:
1390: return true;
1391: }
1392:
1393: } // class Entry
1394: }
|