0001: // httpdProxyHandler.java
0002: // -----------------------
0003: // part of YACY
0004: // (C) by Michael Peter Christen; mc@anomic.de
0005: // first published on http://www.anomic.de
0006: // Frankfurt, Germany, 2004
0007: //
0008: // $LastChangedDate: 2008-01-28 18:21:08 +0000 (Mo, 28 Jan 2008) $
0009: // $LastChangedRevision: 4411 $
0010: // $LastChangedBy: orbiter $
0011: //
0012: // This program is free software; you can redistribute it and/or modify
0013: // it under the terms of the GNU General Public License as published by
0014: // the Free Software Foundation; either version 2 of the License, or
0015: // (at your option) any later version.
0016: //
0017: // This program is distributed in the hope that it will be useful,
0018: // but WITHOUT ANY WARRANTY; without even the implied warranty of
0019: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0020: // GNU General Public License for more details.
0021: //
0022: // You should have received a copy of the GNU General Public License
0023: // along with this program; if not, write to the Free Software
0024: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0025: //
0026: // Using this software in any meaning (reading, learning, copying, compiling,
0027: // running) means that you agree that the Author(s) is (are) not responsible
0028: // for cost, loss of data or any harm that may be caused directly or indirectly
0029: // by usage of this softare or this documentation. The usage of this software
0030: // is on your own risk. The installation and usage (starting/running) of this
0031: // software may allow other people or application to access your computer and
0032: // any attached devices and is highly dependent on the configuration of the
0033: // software which must be done by the user of the software; the author(s) is
0034: // (are) also not responsible for proper configuration and usage of the
0035: // software, even if provoked by documentation provided together with
0036: // the software.
0037: //
0038: // Any changes to this file according to the GPL as documented in the file
0039: // gpl.txt aside this file in the shipment you received can be done to the
0040: // lines that follows this copyright notice here, but changes must not be
0041: // done inside the copyright notive above. A re-distribution must contain
0042: // the intact and unchanged copyright notice.
0043: // Contributions and changes to the program code must be marked as such.
0044:
0045: // Contributions:
0046: // [AS] Alexander Schier: Blacklist (404 response for AGIS hosts)
0047: // [TL] Timo Leise: url-wildcards for blacklists
0048:
0049: /*
0050: Class documentation:
0051: This class is a servlet to the httpd daemon. It is accessed each time
0052: an URL in a GET, HEAD or POST command contains the whole host information
0053: or a host is given in the header host field of an HTTP/1.0 / HTTP/1.1
0054: command.
0055: Transparency is maintained, whenever appropriate. We change header
0056: atributes if necessary for the indexing mechanism; i.e. we do not
0057: support gzip-ed encoding. We also do not support unrealistic
0058: 'expires' values that would force a cache to be flushed immediately
0059: pragma non-cache attributes are supported
0060: */
0061:
0062: package de.anomic.http;
0063:
0064: import java.io.BufferedReader;
0065: import java.io.File;
0066: import java.io.IOException;
0067: import java.io.InputStream;
0068: import java.io.InputStreamReader;
0069: import java.io.OutputStream;
0070: import java.io.PrintWriter;
0071: import java.io.PushbackInputStream;
0072: import java.io.Writer;
0073: import java.net.BindException;
0074: import java.net.ConnectException;
0075: import java.net.InetAddress;
0076: import java.net.MalformedURLException;
0077: import java.net.NoRouteToHostException;
0078: import java.net.Socket;
0079: import java.net.SocketTimeoutException;
0080: import java.net.UnknownHostException;
0081: import java.util.Arrays;
0082: import java.util.Date;
0083: import java.util.HashSet;
0084: import java.util.Iterator;
0085: import java.util.Properties;
0086: import java.util.logging.FileHandler;
0087: import java.util.logging.Level;
0088: import java.util.logging.LogManager;
0089: import java.util.logging.Logger;
0090: import java.util.zip.GZIPOutputStream;
0091:
0092: import de.anomic.htmlFilter.htmlFilterContentTransformer;
0093: import de.anomic.htmlFilter.htmlFilterTransformer;
0094: import de.anomic.htmlFilter.htmlFilterWriter;
0095: import de.anomic.plasma.plasmaHTCache;
0096: import de.anomic.plasma.plasmaParser;
0097: import de.anomic.plasma.plasmaSwitchboard;
0098: import de.anomic.plasma.cache.IResourceInfo;
0099: import de.anomic.plasma.cache.http.ResourceInfo;
0100: import de.anomic.plasma.urlPattern.plasmaURLPattern;
0101: import de.anomic.server.serverCore;
0102: import de.anomic.server.serverDomains;
0103: import de.anomic.server.serverFileUtils;
0104: import de.anomic.server.serverObjects;
0105: import de.anomic.server.logging.serverLog;
0106: import de.anomic.server.logging.serverMiniLogFormatter;
0107: import de.anomic.yacy.yacyCore;
0108: import de.anomic.yacy.yacyURL;
0109:
0110: public final class httpdProxyHandler {
0111:
0112: // static variables
0113: // can only be instantiated upon first instantiation of this class object
0114: private static plasmaSwitchboard switchboard = null;
0115: public static HashSet<String> yellowList = null;
0116: private static int timeout = 30000;
0117: private static boolean yacyTrigger = true;
0118: public static boolean isTransparentProxy = false;
0119: private static Process redirectorProcess;
0120: private static boolean redirectorEnabled = false;
0121: private static PrintWriter redirectorWriter;
0122: private static BufferedReader redirectorReader;
0123:
0124: private static htmlFilterTransformer transformer = null;
0125: public static final String proxyUserAgent = "yacy ("
0126: + httpc.systemOST + ") yacy.net";
0127: public static final String crawlerUserAgent = "yacybot ("
0128: + httpc.systemOST + ") http://yacy.net/bot.html";
0129: private static File htRootPath = null;
0130:
0131: //private Properties connectionProperties = null;
0132: private static serverLog theLogger;
0133:
0134: private static boolean doAccessLogging = false;
0135: /**
0136: * Do logging configuration for special proxy access log file
0137: */
0138: static {
0139: // Doing logger initialisation
0140: try {
0141: serverLog.logInfo("PROXY",
0142: "Configuring proxy access logging ...");
0143:
0144: // getting the logging manager
0145: LogManager manager = LogManager.getLogManager();
0146: String className = httpdProxyHandler.class.getName();
0147:
0148: // determining if proxy access logging is enabled
0149: String enabled = manager
0150: .getProperty("de.anomic.http.httpdProxyHandler.logging.enabled");
0151: if ("true".equalsIgnoreCase(enabled)) {
0152:
0153: // reading out some needed configuration properties
0154: int limit = 1024 * 1024, count = 20;
0155: String pattern = manager.getProperty(className
0156: + ".logging.FileHandler.pattern");
0157: if (pattern == null)
0158: pattern = "DATA/LOG/proxyAccess%u%g.log";
0159:
0160: String limitStr = manager.getProperty(className
0161: + ".logging.FileHandler.limit");
0162: if (limitStr != null)
0163: try {
0164: limit = Integer.valueOf(limitStr).intValue();
0165: } catch (NumberFormatException e) {
0166: }
0167:
0168: String countStr = manager.getProperty(className
0169: + ".logging.FileHandler.count");
0170: if (countStr != null)
0171: try {
0172: count = Integer.valueOf(countStr).intValue();
0173: } catch (NumberFormatException e) {
0174: }
0175:
0176: // creating the proxy access logger
0177: Logger proxyLogger = Logger.getLogger("PROXY.access");
0178: proxyLogger.setUseParentHandlers(false);
0179: proxyLogger.setLevel(Level.FINEST);
0180:
0181: FileHandler txtLog = new FileHandler(pattern, limit,
0182: count, true);
0183: txtLog.setFormatter(new serverMiniLogFormatter());
0184: txtLog.setLevel(Level.FINEST);
0185: proxyLogger.addHandler(txtLog);
0186:
0187: doAccessLogging = true;
0188: serverLog.logInfo("PROXY",
0189: "Proxy access logging configuration done."
0190: + "\n\tFilename: " + pattern
0191: + "\n\tLimit: " + limitStr
0192: + "\n\tCount: " + countStr);
0193: } else {
0194: serverLog.logInfo("PROXY",
0195: "Proxy access logging is deactivated.");
0196: }
0197: } catch (Exception e) {
0198: serverLog.logSevere("PROXY",
0199: "Unable to configure proxy access logging.", e);
0200: }
0201:
0202: switchboard = plasmaSwitchboard.getSwitchboard();
0203:
0204: // creating a logger
0205: theLogger = new serverLog("PROXY");
0206:
0207: isTransparentProxy = Boolean.valueOf(
0208: switchboard.getConfig("isTransparentProxy", "false"))
0209: .booleanValue();
0210:
0211: // set timeout
0212: timeout = Integer.parseInt(switchboard.getConfig(
0213: "proxy.clientTimeout", "10000"));
0214:
0215: // create a htRootPath: system pages
0216: htRootPath = new File(switchboard.getRootPath(), switchboard
0217: .getConfig("htRootPath", "htroot"));
0218: if (!(htRootPath.exists()))
0219: htRootPath.mkdir();
0220:
0221: // load a transformer
0222: transformer = new htmlFilterContentTransformer();
0223: transformer.init(new File(switchboard.getRootPath(),
0224: switchboard.getConfig(plasmaSwitchboard.LIST_BLUE, ""))
0225: .toString());
0226:
0227: String f;
0228: // load the yellow-list
0229: f = switchboard.getConfig("proxyYellowList", null);
0230: if (f != null) {
0231: yellowList = serverFileUtils.loadList(new File(f));
0232: theLogger.logConfig("loaded yellow-list from file " + f
0233: + ", " + yellowList.size() + " entries");
0234: } else {
0235: yellowList = new HashSet<String>();
0236: }
0237:
0238: String redirectorPath = switchboard.getConfig(
0239: "externalRedirector", "");
0240: if (redirectorPath.length() > 0 && redirectorEnabled == false) {
0241: try {
0242: redirectorProcess = Runtime.getRuntime().exec(
0243: redirectorPath);
0244: redirectorWriter = new PrintWriter(redirectorProcess
0245: .getOutputStream());
0246: redirectorReader = new BufferedReader(
0247: new InputStreamReader(redirectorProcess
0248: .getInputStream()));
0249: redirectorEnabled = true;
0250: } catch (IOException e) {
0251: System.out.println("redirector not Found");
0252: }
0253: }
0254: }
0255:
0256: /**
0257: * Special logger instance for proxy access logging much similar
0258: * to the squid access.log file
0259: */
0260: private static final serverLog proxyLog = new serverLog(
0261: "PROXY.access");
0262:
0263: /**
0264: * Reusable {@link StringBuffer} for logging
0265: */
0266: private static final StringBuffer logMessage = new StringBuffer();
0267:
0268: /**
0269: * Reusable {@link StringBuffer} to generate the useragent string
0270: */
0271: private static final StringBuffer userAgentStr = new StringBuffer();
0272:
0273: private static String domain(String host) {
0274: String domain = host;
0275: int pos = domain.lastIndexOf(".");
0276: if (pos >= 0) {
0277: // truncate from last part
0278: domain = domain.substring(0, pos);
0279: pos = domain.lastIndexOf(".");
0280: if (pos >= 0) {
0281: // truncate from first part
0282: domain = domain.substring(pos + 1);
0283: }
0284: }
0285: return domain;
0286: }
0287:
0288: public static void handleOutgoingCookies(httpHeader requestHeader,
0289: String targethost, String clienthost) {
0290: /*
0291: The syntax for the header is:
0292:
0293: cookie = "Cookie:" cookie-version
0294: 1*((";" | ",") cookie-value)
0295: cookie-value = NAME "=" VALUE [";" path] [";" domain]
0296: cookie-version = "$Version" "=" value
0297: NAME = attr
0298: VALUE = value
0299: path = "$Path" "=" value
0300: domain = "$Domain" "=" value
0301: */
0302: if (requestHeader.containsKey(httpHeader.COOKIE)) {
0303: Object[] entry = new Object[] { new Date(), clienthost,
0304: requestHeader.getMultiple(httpHeader.COOKIE) };
0305: switchboard.outgoingCookies.put(targethost, entry);
0306: }
0307: }
0308:
0309: public static void handleIncomingCookies(httpHeader respondHeader,
0310: String serverhost, String targetclient) {
0311: /*
0312: The syntax for the Set-Cookie response header is
0313:
0314: set-cookie = "Set-Cookie:" cookies
0315: cookies = 1#cookie
0316: cookie = NAME "=" VALUE *(";" cookie-av)
0317: NAME = attr
0318: VALUE = value
0319: cookie-av = "Comment" "=" value
0320: | "Domain" "=" value
0321: | "Max-Age" "=" value
0322: | "Path" "=" value
0323: | "Secure"
0324: | "Version" "=" 1*DIGIT
0325: */
0326: if (respondHeader.containsKey(httpHeader.SET_COOKIE)) {
0327: Object[] entry = new Object[] { new Date(), targetclient,
0328: respondHeader.getMultiple(httpHeader.SET_COOKIE) };
0329: switchboard.incomingCookies.put(serverhost, entry);
0330: }
0331: }
0332:
0333: /**
0334: * @param conProp a collection of properties about the connection, like URL
0335: * @param requestHeader The header lines of the connection from the request
0336: * @param respond the OutputStream to the client
0337: * @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream)
0338: */
0339: public static void doGet(Properties conProp,
0340: httpHeader requestHeader, OutputStream respond) {
0341:
0342: try {
0343: // remembering the starting time of the request
0344: final Date requestDate = new Date(); // remember the time...
0345: conProp.put(httpHeader.CONNECTION_PROP_REQUEST_START,
0346: new Long(requestDate.getTime()));
0347: if (yacyTrigger)
0348: de.anomic.yacy.yacyCore.triggerOnlineAction();
0349: switchboard.proxyLastAccess = System.currentTimeMillis();
0350:
0351: // using an ByteCount OutputStream to count the send bytes (needed for the logfile)
0352: respond = new httpdByteCountOutputStream(respond,
0353: conProp.getProperty(
0354: httpHeader.CONNECTION_PROP_REQUESTLINE)
0355: .length() + 2, "PROXY");
0356:
0357: String host = conProp
0358: .getProperty(httpHeader.CONNECTION_PROP_HOST);
0359: String path = conProp
0360: .getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
0361: final String args = conProp
0362: .getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
0363: final String ip = conProp
0364: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
0365: int pos = 0;
0366: int port = 0;
0367:
0368: yacyURL url = null;
0369: try {
0370: url = httpHeader.getRequestURL(conProp);
0371:
0372: //redirector
0373: if (redirectorEnabled) {
0374: synchronized (redirectorProcess) {
0375: redirectorWriter.println(url.toNormalform(
0376: false, true));
0377: redirectorWriter.flush();
0378: }
0379: String newUrl = redirectorReader.readLine();
0380: if (!newUrl.equals("")) {
0381: try {
0382: url = new yacyURL(newUrl, null);
0383: } catch (MalformedURLException e) {
0384: }//just keep the old one
0385: }
0386: conProp.setProperty(
0387: httpHeader.CONNECTION_PROP_HOST, url
0388: .getHost()
0389: + ":" + url.getPort());
0390: conProp.setProperty(
0391: httpHeader.CONNECTION_PROP_PATH, url
0392: .getPath());
0393: requestHeader.put(httpHeader.HOST, url.getHost()
0394: + ":" + url.getPort());
0395: requestHeader.put(httpHeader.CONNECTION_PROP_PATH,
0396: url.getPath());
0397: }
0398: } catch (MalformedURLException e) {
0399: String errorMsg = "ERROR: internal error with url generation: host="
0400: + host
0401: + ", port="
0402: + port
0403: + ", path="
0404: + path
0405: + ", args=" + args;
0406: serverLog.logSevere("PROXY", errorMsg);
0407: httpd.sendRespondError(conProp, respond, 4, 501, null,
0408: errorMsg, e);
0409: return;
0410: }
0411:
0412: if ((pos = host.indexOf(":")) < 0) {
0413: port = 80;
0414: } else {
0415: port = Integer.parseInt(host.substring(pos + 1));
0416: host = host.substring(0, pos);
0417: }
0418:
0419: String ext;
0420: if ((pos = path.lastIndexOf('.')) < 0) {
0421: ext = "";
0422: } else {
0423: ext = path.substring(pos + 1).toLowerCase();
0424: }
0425:
0426: // check the blacklist
0427: // blacklist idea inspired by [AS]:
0428: // respond a 404 for all AGIS ("all you get is shit") servers
0429: String hostlow = host.toLowerCase();
0430: if (args != null) {
0431: path = path + "?" + args;
0432: }
0433: if (plasmaSwitchboard.urlBlacklist.isListed(
0434: plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
0435: httpd
0436: .sendRespondError(
0437: conProp,
0438: respond,
0439: 4,
0440: 403,
0441: null,
0442: "URL '"
0443: + hostlow
0444: + "' blocked by yacy proxy (blacklisted)",
0445: null);
0446: theLogger.logInfo("AGIS blocking of host '" + hostlow
0447: + "'");
0448: return;
0449: }
0450:
0451: // handle outgoing cookies
0452: handleOutgoingCookies(requestHeader, host, ip);
0453:
0454: // set another userAgent, if not yellowlisted
0455: if ((yellowList != null)
0456: && (!(yellowList.contains(domain(hostlow))))) {
0457: // change the User-Agent
0458: requestHeader.put(httpHeader.USER_AGENT,
0459: generateUserAgent(requestHeader));
0460: }
0461:
0462: // setting the X-Forwarded-For Header
0463: if (switchboard.getConfigBool(
0464: "proxy.sendXForwardedForHeader", true)) {
0465: requestHeader
0466: .put(
0467: httpHeader.X_FORWARDED_FOR,
0468: conProp
0469: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
0470: }
0471:
0472: // decide wether to use a cache entry or connect to the network
0473: File cacheFile = plasmaHTCache.getCachePath(url);
0474:
0475: httpHeader cachedResponseHeader = null;
0476: ResourceInfo cachedResInfo = (ResourceInfo) plasmaHTCache
0477: .loadResourceInfo(url);
0478: if (cachedResInfo != null) {
0479: // set the new request header (needed by function shallUseCacheForProxy)
0480: cachedResInfo.setRequestHeader(requestHeader);
0481:
0482: // get the cached response header
0483: cachedResponseHeader = cachedResInfo
0484: .getResponseHeader();
0485: }
0486: boolean cacheExists = cacheFile.isFile()
0487: && (cachedResponseHeader != null);
0488:
0489: // why are files unzipped upon arrival? why not zip all files in cache?
0490: // This follows from the following premises
0491: // (a) no file shall be unzip-ed more than once to prevent unnessesary computing time
0492: // (b) old cache entries shall be comparable with refill-entries to detect/distiguish case 3+4
0493: // (c) the indexing mechanism needs files unzip-ed, a schedule could do that later
0494: // case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped
0495: // and the newly arrival would be zipped and would have to be unzipped upon load. But then the
0496: // scheduler is superfluous. Therefore the only reminding case is
0497: // (d) cached files shall be either all zipped or unzipped
0498: // case d contradicts with a, because files need to be unzipped for indexing. Therefore
0499: // the only remaining case is to unzip files right upon load. Thats what we do here.
0500:
0501: // finally use existing cache if appropriate
0502: // here we must decide weather or not to save the data
0503: // to a cache
0504: // we distinguish four CACHE STATE cases:
0505: // 1. cache fill
0506: // 2. cache fresh - no refill
0507: // 3. cache stale - refill - necessary
0508: // 4. cache stale - refill - superfluous
0509: // in two of these cases we trigger a scheduler to handle newly arrived files:
0510: // case 1 and case 3
0511: plasmaHTCache.Entry cacheEntry = (cachedResponseHeader == null) ? null
0512: : plasmaHTCache.newEntry(requestDate, // init date
0513: 0, // crawling depth
0514: url, // url
0515: "", // name of the url is unknown
0516: //requestHeader, // request headers
0517: "200 OK", // request status
0518: //cachedResponseHeader, // response headers
0519: cachedResInfo, null, // initiator
0520: switchboard.defaultProxyProfile // profile
0521: );
0522: if (yacyCore.getOnlineMode() == 0) {
0523: if (cacheExists) {
0524: fulfillRequestFromCache(conProp, url, ext,
0525: requestHeader, cachedResponseHeader,
0526: cacheFile, respond);
0527: } else {
0528: httpd.sendRespondError(conProp, respond, 4, 404,
0529: null, "URL not availabe in Cache", null);
0530: }
0531: } else if (cacheExists
0532: && cacheEntry.shallUseCacheForProxy()) {
0533: fulfillRequestFromCache(conProp, url, ext,
0534: requestHeader, cachedResponseHeader, cacheFile,
0535: respond);
0536: } else {
0537: fulfillRequestFromWeb(conProp, url, ext, requestHeader,
0538: cachedResponseHeader, cacheFile, respond);
0539: }
0540:
0541: } catch (Exception e) {
0542: try {
0543: String exTxt = e.getMessage();
0544: if ((exTxt != null)
0545: && (exTxt.startsWith("Socket closed"))) {
0546: forceConnectionClose(conProp);
0547: } else if (!conProp
0548: .containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
0549: String errorMsg = "Unexpected Error. "
0550: + e.getClass().getName() + ": "
0551: + e.getMessage();
0552: httpd.sendRespondError(conProp, respond, 4, 501,
0553: null, errorMsg, e);
0554: theLogger.logSevere(errorMsg);
0555: } else {
0556: forceConnectionClose(conProp);
0557: }
0558: } catch (Exception ee) {
0559: forceConnectionClose(conProp);
0560: }
0561: } finally {
0562: try {
0563: respond.flush();
0564: } catch (Exception e) {
0565: }
0566: if (respond instanceof httpdByteCountOutputStream)
0567: ((httpdByteCountOutputStream) respond).finish();
0568:
0569: conProp.put(httpHeader.CONNECTION_PROP_REQUEST_END,
0570: new Long(System.currentTimeMillis()));
0571: conProp.put(httpHeader.CONNECTION_PROP_PROXY_RESPOND_SIZE,
0572: new Long(((httpdByteCountOutputStream) respond)
0573: .getCount()));
0574: logProxyAccess(conProp);
0575: }
0576: }
0577:
0578: private static void fulfillRequestFromWeb(Properties conProp,
0579: yacyURL url, String ext, httpHeader requestHeader,
0580: httpHeader cachedResponseHeader, File cacheFile,
0581: OutputStream respond) {
0582:
0583: GZIPOutputStream gzippedOut = null;
0584: httpChunkedOutputStream chunkedOut = null;
0585: Object hfos = null;
0586:
0587: httpc remote = null;
0588: httpc.response res = null;
0589: try {
0590:
0591: String host = conProp
0592: .getProperty(httpHeader.CONNECTION_PROP_HOST);
0593: String path = conProp
0594: .getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
0595: String args = conProp
0596: .getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
0597: String ip = conProp
0598: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
0599: String httpVer = conProp
0600: .getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); // the ip from the connecting peer
0601:
0602: int port, pos;
0603: if ((pos = host.indexOf(":")) < 0) {
0604: port = 80;
0605: } else {
0606: port = Integer.parseInt(host.substring(pos + 1));
0607: host = host.substring(0, pos);
0608: }
0609:
0610: // resolve yacy and yacyh domains
0611: String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
0612:
0613: // re-calc the url path
0614: String remotePath = (args == null) ? path
0615: : (path + "?" + args); // with leading '/'
0616:
0617: // attach possible yacy-sublevel-domain
0618: if ((yAddress != null)
0619: && ((pos = yAddress.indexOf("/")) >= 0)
0620: && (!(remotePath.startsWith("/env"))) // this is the special path, staying always at root-level
0621: )
0622: remotePath = yAddress.substring(pos) + remotePath;
0623:
0624: // open the connection
0625: remote = (yAddress == null) ? newhttpc(host, port, timeout)
0626: : newhttpc(yAddress, timeout);
0627:
0628: // removing hop by hop headers
0629: removeHopByHopHeaders(requestHeader);
0630:
0631: // adding additional headers
0632: setViaHeader(requestHeader, httpVer);
0633:
0634: // send request
0635: res = remote.GET(remotePath, requestHeader);
0636: conProp.put(
0637: httpHeader.CONNECTION_PROP_CLIENT_REQUEST_HEADER,
0638: requestHeader);
0639:
0640: // determine if it's an internal error of the httpc
0641: if (res.responseHeader.size() == 0) {
0642: throw new Exception(res.statusText);
0643: }
0644:
0645: // if the content length is not set we have to use chunked transfer encoding
0646: long contentLength = res.responseHeader.contentLength();
0647: if (contentLength < 0) {
0648: // according to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
0649: // a 204,304 message must not contain a message body.
0650: // Therefore we need to set the content-length to 0.
0651: if (res.status.startsWith("204")
0652: || res.status.startsWith("304")) {
0653: res.responseHeader.put(httpHeader.CONTENT_LENGTH,
0654: "0");
0655: } else {
0656: if (httpVer.equals(httpHeader.HTTP_VERSION_0_9)
0657: || httpVer
0658: .equals(httpHeader.HTTP_VERSION_1_0)) {
0659: conProp.setProperty(
0660: httpHeader.CONNECTION_PROP_PERSISTENT,
0661: "close");
0662: } else {
0663: chunkedOut = new httpChunkedOutputStream(
0664: respond);
0665: }
0666: res.responseHeader
0667: .remove(httpHeader.CONTENT_LENGTH);
0668: }
0669: }
0670:
0671: // if (((String)requestHeader.get(httpHeader.ACCEPT_ENCODING,"")).indexOf("gzip") != -1) {
0672: // zipped = new GZIPOutputStream((chunked != null) ? chunked : respond);
0673: // res.responseHeader.put(httpHeader.CONTENT_ENCODING, "gzip");
0674: // res.responseHeader.remove(httpHeader.CONTENT_LENGTH);
0675: // }
0676:
0677: // the cache does either not exist or is (supposed to be) stale
0678: long sizeBeforeDelete = -1;
0679: if ((cacheFile.isFile()) && (cachedResponseHeader != null)) {
0680: // delete the cache
0681: sizeBeforeDelete = cacheFile.length();
0682: plasmaHTCache.deleteURLfromCache(url);
0683: conProp.setProperty(
0684: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0685: "TCP_REFRESH_MISS");
0686: }
0687:
0688: // reserver cache entry
0689: Date requestDate = new Date(((Long) conProp
0690: .get(httpHeader.CONNECTION_PROP_REQUEST_START))
0691: .longValue());
0692: IResourceInfo resInfo = new ResourceInfo(url,
0693: requestHeader, res.responseHeader);
0694: plasmaHTCache.Entry cacheEntry = plasmaHTCache.newEntry(
0695: requestDate, 0, url, "",
0696: //requestHeader,
0697: res.status,
0698: //res.responseHeader,
0699: resInfo, null, switchboard.defaultProxyProfile);
0700:
0701: // handle file types and make (possibly transforming) output stream
0702: if ((!transformer.isIdentityTransformer())
0703: && (plasmaParser.supportedHTMLContent(url,
0704: res.responseHeader.mime()))) {
0705: // make a transformer
0706: theLogger.logFine("create transformer for URL " + url);
0707: //hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0));
0708: String charSet = res.responseHeader
0709: .getCharacterEncoding();
0710: if (charSet == null)
0711: charSet = httpHeader.DEFAULT_CHARSET;
0712: hfos = new htmlFilterWriter(
0713: (gzippedOut != null) ? gzippedOut
0714: : ((chunkedOut != null) ? chunkedOut
0715: : respond), charSet, null,
0716: transformer, (ext.length() == 0));
0717: } else {
0718: // simply pass through without parsing
0719: theLogger.logFine("create passthrough for URL " + url
0720: + ", extension '" + ext + "', mime-type '"
0721: + res.responseHeader.mime() + "'");
0722: hfos = (gzippedOut != null) ? gzippedOut
0723: : ((chunkedOut != null) ? chunkedOut : respond);
0724: }
0725:
0726: // handle incoming cookies
0727: handleIncomingCookies(res.responseHeader, host, ip);
0728:
0729: // remove hop by hop headers
0730: removeHopByHopHeaders(res.responseHeader);
0731:
0732: // adding additional headers
0733: setViaHeader(res.responseHeader, res.httpVer);
0734:
0735: // sending the respond header back to the client
0736: if (chunkedOut != null) {
0737: res.responseHeader.put(httpHeader.TRANSFER_ENCODING,
0738: "chunked");
0739: }
0740:
0741: httpd.sendRespondHeader(conProp, respond, httpVer,
0742: res.statusCode, res.statusText, res.responseHeader);
0743:
0744: String storeError = cacheEntry.shallStoreCacheForProxy();
0745: boolean storeHTCache = cacheEntry.profile().storeHTCache();
0746: boolean isSupportedContent = plasmaParser.supportedContent(
0747: plasmaParser.PARSER_MODE_PROXY, cacheEntry.url(),
0748: cacheEntry.getMimeType());
0749: if (
0750: /*
0751: * Now we store the response into the htcache directory if
0752: * a) the response is cacheable AND
0753: */
0754: (storeError == null) &&
0755: /*
0756: * b) the user has configured to use the htcache OR
0757: * c) the content should be indexed
0758: */
0759: ((storeHTCache) || (isSupportedContent))) {
0760: // we write a new cache entry
0761: if ((contentLength > 0) && (contentLength < 1048576)) // if the length is known and < 1 MB
0762: {
0763: // ok, we don't write actually into a file, only to RAM, and schedule writing the file.
0764: byte[] cacheArray = res.writeContent(hfos, true);
0765: theLogger.logFine("writeContent of "
0766: + url
0767: + " produced cacheArray = "
0768: + ((cacheArray == null) ? "null"
0769: : ("size=" + cacheArray.length)));
0770:
0771: if (hfos instanceof htmlFilterWriter)
0772: ((htmlFilterWriter) hfos).finalize();
0773:
0774: if (sizeBeforeDelete == -1) {
0775: // totally fresh file
0776: //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
0777: cacheEntry.setCacheArray(cacheArray);
0778: plasmaHTCache.push(cacheEntry);
0779: conProp
0780: .setProperty(
0781: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0782: "TCP_MISS");
0783: } else if (sizeBeforeDelete == cacheArray.length) {
0784: // before we came here we deleted a cache entry
0785: cacheArray = null;
0786: //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD;
0787: //cacheManager.push(cacheEntry); // unnecessary update
0788: conProp
0789: .setProperty(
0790: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0791: "TCP_REF_FAIL_HIT");
0792: } else {
0793: // before we came here we deleted a cache entry
0794: //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD;
0795: cacheEntry.setCacheArray(cacheArray);
0796: plasmaHTCache.push(cacheEntry); // necessary update, write response header to cache
0797: conProp
0798: .setProperty(
0799: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0800: "TCP_REFRESH_MISS");
0801: }
0802: } else {
0803: // the file is too big to cache it in the ram, or the size is unknown
0804: // write to file right here.
0805: cacheFile.getParentFile().mkdirs();
0806: res.writeContent(hfos, cacheFile);
0807: if (hfos instanceof htmlFilterWriter)
0808: ((htmlFilterWriter) hfos).finalize();
0809: theLogger.logFine("for write-file of " + url
0810: + ": contentLength = " + contentLength
0811: + ", sizeBeforeDelete = "
0812: + sizeBeforeDelete);
0813: plasmaHTCache.writeFileAnnouncement(cacheFile);
0814: if (sizeBeforeDelete == -1) {
0815: // totally fresh file
0816: //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
0817: plasmaHTCache.push(cacheEntry);
0818: conProp
0819: .setProperty(
0820: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0821: "TCP_MISS");
0822: } else if (sizeBeforeDelete == cacheFile.length()) {
0823: // before we came here we deleted a cache entry
0824: //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD;
0825: //cacheManager.push(cacheEntry); // unnecessary update
0826: conProp
0827: .setProperty(
0828: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0829: "TCP_REF_FAIL_HIT");
0830: } else {
0831: // before we came here we deleted a cache entry
0832: //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD;
0833: plasmaHTCache.push(cacheEntry); // necessary update, write response header to cache
0834: conProp
0835: .setProperty(
0836: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0837: "TCP_REFRESH_MISS");
0838: }
0839: // beware! all these writings will not fill the cacheEntry.cacheArray
0840: // that means they are not available for the indexer (except they are scraped before)
0841: }
0842: } else {
0843: // no caching
0844: theLogger.logFine(cacheFile.toString() + " not cached."
0845: + " StoreError="
0846: + ((storeError == null) ? "None" : storeError)
0847: + " StoreHTCache=" + storeHTCache
0848: + " SupportetContent=" + isSupportedContent);
0849:
0850: res.writeContent(hfos, null);
0851: if (hfos instanceof htmlFilterWriter)
0852: ((htmlFilterWriter) hfos).finalize();
0853: if (sizeBeforeDelete == -1) {
0854: // no old file and no load. just data passing
0855: //cacheEntry.status = plasmaHTCache.CACHE_PASSING;
0856: //cacheManager.push(cacheEntry);
0857: } else {
0858: // before we came here we deleted a cache entry
0859: //cacheEntry.status = plasmaHTCache.CACHE_STALE_NO_RELOAD;
0860: //cacheManager.push(cacheEntry);
0861: }
0862: conProp.setProperty(
0863: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0864: "TCP_MISS");
0865: }
0866:
0867: remote.close();
0868:
0869: if (gzippedOut != null) {
0870: gzippedOut.finish();
0871: }
0872: if (chunkedOut != null) {
0873: chunkedOut.finish();
0874: chunkedOut.flush();
0875: }
0876: } catch (Exception e) {
0877: // deleting cached content
0878: if (cacheFile.exists())
0879: cacheFile.delete();
0880: handleProxyException(e, remote, conProp, respond, url);
0881: }
0882: }
0883:
0884: private static void fulfillRequestFromCache(Properties conProp,
0885: yacyURL url, String ext, httpHeader requestHeader,
0886: httpHeader cachedResponseHeader, File cacheFile,
0887: OutputStream respond) throws IOException {
0888:
0889: String httpVer = conProp
0890: .getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
0891:
0892: httpChunkedOutputStream chunkedOut = null;
0893: GZIPOutputStream gzippedOut = null;
0894: Object hfos = null;
0895:
0896: // we respond on the request by using the cache, the cache is fresh
0897: try {
0898: // remove hop by hop headers
0899: removeHopByHopHeaders(cachedResponseHeader);
0900:
0901: // adding additional headers
0902: setViaHeader(cachedResponseHeader, httpVer);
0903:
0904: // replace date field in old header by actual date, this is according to RFC
0905: cachedResponseHeader.put(httpHeader.DATE, httpc
0906: .dateString(httpc.nowDate()));
0907:
0908: // if (((String)requestHeader.get(httpHeader.ACCEPT_ENCODING,"")).indexOf("gzip") != -1) {
0909: // chunked = new httpChunkedOutputStream(respond);
0910: // zipped = new GZIPOutputStream(chunked);
0911: // cachedResponseHeader.put(httpHeader.TRANSFER_ENCODING, "chunked");
0912: // cachedResponseHeader.put(httpHeader.CONTENT_ENCODING, "gzip");
0913: // } else {
0914: // maybe the content length is missing
0915: // if (!(cachedResponseHeader.containsKey(httpHeader.CONTENT_LENGTH)))
0916: // cachedResponseHeader.put(httpHeader.CONTENT_LENGTH, Long.toString(cacheFile.length()));
0917: // }
0918:
0919: // check if we can send a 304 instead the complete content
0920: if (requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
0921: // conditional request: freshness of cache for that condition was already
0922: // checked within shallUseCache(). Now send only a 304 response
0923: theLogger.logInfo("CACHE HIT/304 "
0924: + cacheFile.toString());
0925: conProp.setProperty(
0926: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0927: "TCP_REFRESH_HIT");
0928:
0929: // setting the content length header to 0
0930: cachedResponseHeader.put(httpHeader.CONTENT_LENGTH,
0931: Integer.toString(0));
0932:
0933: // send cached header with replaced date and added length
0934: httpd.sendRespondHeader(conProp, respond, httpVer, 304,
0935: cachedResponseHeader);
0936: //respondHeader(respond, "304 OK", cachedResponseHeader); // respond with 'not modified'
0937: } else {
0938: // unconditional request: send content of cache
0939: theLogger.logInfo("CACHE HIT/203 "
0940: + cacheFile.toString());
0941: conProp.setProperty(
0942: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
0943: "TCP_HIT");
0944:
0945: // setting the content header to the proper length
0946: cachedResponseHeader.put(httpHeader.CONTENT_LENGTH,
0947: Long.toString(cacheFile.length()));
0948:
0949: // send cached header with replaced date and added length
0950: httpd.sendRespondHeader(conProp, respond, httpVer, 203,
0951: cachedResponseHeader);
0952: //respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
0953:
0954: // determine the content charset
0955: String charSet = cachedResponseHeader
0956: .getCharacterEncoding();
0957: if (charSet == null)
0958: charSet = httpHeader.DEFAULT_CHARSET;
0959:
0960: // make a transformer
0961: if ((!transformer.isIdentityTransformer())
0962: && (ext == null || !plasmaParser
0963: .supportedHTMLFileExtContains(url))
0964: && (plasmaParser
0965: .HTMLParsableMimeTypesContains(cachedResponseHeader
0966: .mime()))) {
0967: hfos = new htmlFilterWriter(
0968: (chunkedOut != null) ? chunkedOut : respond,
0969: charSet, null, transformer,
0970: (ext.length() == 0));
0971: } else {
0972: hfos = (gzippedOut != null) ? gzippedOut
0973: : ((chunkedOut != null) ? chunkedOut
0974: : respond);
0975: }
0976:
0977: // send also the complete body now from the cache
0978: // simply read the file and transfer to out socket
0979: if (hfos instanceof OutputStream) {
0980: serverFileUtils
0981: .copy(cacheFile, (OutputStream) hfos);
0982: } else if (hfos instanceof Writer) {
0983: serverFileUtils.copy(cacheFile, charSet,
0984: (Writer) hfos);
0985: }
0986:
0987: if (hfos instanceof htmlFilterWriter)
0988: ((htmlFilterWriter) hfos).finalize();
0989: if (gzippedOut != null)
0990: gzippedOut.finish();
0991: if (chunkedOut != null)
0992: chunkedOut.finish();
0993: }
0994: // that's it!
0995: } catch (Exception e) {
0996: // this happens if the client stops loading the file
0997: // we do nothing here
0998: if (conProp
0999: .containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
1000: theLogger
1001: .logWarning("Error while trying to send cached message body.");
1002: conProp.setProperty(
1003: httpHeader.CONNECTION_PROP_PERSISTENT, "close");
1004: } else {
1005: httpd.sendRespondError(conProp, respond, 4, 503,
1006: "socket error: " + e.getMessage(),
1007: "socket error: " + e.getMessage(), e);
1008: }
1009: } finally {
1010: try {
1011: respond.flush();
1012: } catch (Exception e) {
1013: }
1014: }
1015: return;
1016: }
1017:
1018: private static void removeHopByHopHeaders(httpHeader headers) {
1019: /*
1020: - Trailers
1021: */
1022:
1023: headers.remove(httpHeader.CONNECTION);
1024: headers.remove(httpHeader.KEEP_ALIVE);
1025: headers.remove(httpHeader.UPGRADE);
1026: headers.remove(httpHeader.TE);
1027: headers.remove(httpHeader.PROXY_CONNECTION);
1028: headers.remove(httpHeader.PROXY_AUTHENTICATE);
1029: headers.remove(httpHeader.PROXY_AUTHORIZATION);
1030:
1031: // special headers inserted by squid
1032: headers.remove(httpHeader.X_CACHE);
1033: headers.remove(httpHeader.X_CACHE_LOOKUP);
1034:
1035: // remove transfer encoding header
1036: headers.remove(httpHeader.TRANSFER_ENCODING);
1037:
1038: //removing yacy status headers
1039: headers.remove(httpHeader.X_YACY_KEEP_ALIVE_REQUEST_COUNT);
1040: headers.remove(httpHeader.X_YACY_ORIGINAL_REQUEST_LINE);
1041: }
1042:
1043: private static void forceConnectionClose(Properties conProp) {
1044: if (conProp != null) {
1045: conProp.setProperty(httpHeader.CONNECTION_PROP_PERSISTENT,
1046: "close");
1047: }
1048: }
1049:
1050: public static void doHead(Properties conProp,
1051: httpHeader requestHeader, OutputStream respond) {
1052:
1053: httpc remote = null;
1054: httpc.response res = null;
1055: yacyURL url = null;
1056: try {
1057: // remembering the starting time of the request
1058: Date requestDate = new Date(); // remember the time...
1059: conProp.put(httpHeader.CONNECTION_PROP_REQUEST_START,
1060: new Long(requestDate.getTime()));
1061: if (yacyTrigger)
1062: de.anomic.yacy.yacyCore.triggerOnlineAction();
1063: switchboard.proxyLastAccess = System.currentTimeMillis();
1064:
1065: // using an ByteCount OutputStream to count the send bytes
1066: respond = new httpdByteCountOutputStream(respond,
1067: conProp.getProperty(
1068: httpHeader.CONNECTION_PROP_REQUESTLINE)
1069: .length() + 2, "PROXY");
1070:
1071: String host = conProp
1072: .getProperty(httpHeader.CONNECTION_PROP_HOST);
1073: String path = conProp
1074: .getProperty(httpHeader.CONNECTION_PROP_PATH);
1075: String args = conProp
1076: .getProperty(httpHeader.CONNECTION_PROP_ARGS);
1077: String httpVer = conProp
1078: .getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
1079:
1080: switchboard.proxyLastAccess = System.currentTimeMillis();
1081:
1082: int port, pos;
1083: if ((pos = host.indexOf(":")) < 0) {
1084: port = 80;
1085: } else {
1086: port = Integer.parseInt(host.substring(pos + 1));
1087: host = host.substring(0, pos);
1088: }
1089:
1090: try {
1091: url = new yacyURL("http", host, port,
1092: (args == null) ? path : path + "?" + args);
1093: } catch (MalformedURLException e) {
1094: String errorMsg = "ERROR: internal error with url generation: host="
1095: + host
1096: + ", port="
1097: + port
1098: + ", path="
1099: + path
1100: + ", args=" + args;
1101: serverLog.logSevere("PROXY", errorMsg);
1102: httpd.sendRespondError(conProp, respond, 4, 501, null,
1103: errorMsg, e);
1104: return;
1105: }
1106:
1107: // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
1108: String hostlow = host.toLowerCase();
1109:
1110: // re-calc the url path
1111: String remotePath = (args == null) ? path
1112: : (path + "?" + args);
1113:
1114: if (plasmaSwitchboard.urlBlacklist.isListed(
1115: plasmaURLPattern.BLACKLIST_PROXY, hostlow,
1116: remotePath)) {
1117: httpd
1118: .sendRespondError(
1119: conProp,
1120: respond,
1121: 4,
1122: 403,
1123: null,
1124: "URL '"
1125: + hostlow
1126: + "' blocked by yacy proxy (blacklisted)",
1127: null);
1128: theLogger.logInfo("AGIS blocking of host '" + hostlow
1129: + "'");
1130: return;
1131: }
1132:
1133: // set another userAgent, if not yellowlisted
1134: if (!(yellowList.contains(domain(hostlow)))) {
1135: // change the User-Agent
1136: requestHeader.put(httpHeader.USER_AGENT,
1137: generateUserAgent(requestHeader));
1138: }
1139:
1140: // setting the X-Forwarded-For Header
1141: if (switchboard.getConfigBool(
1142: "proxy.sendXForwardedForHeader", true)) {
1143: requestHeader
1144: .put(
1145: httpHeader.X_FORWARDED_FOR,
1146: conProp
1147: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
1148: }
1149:
1150: // resolve yacy and yacyh domains
1151: String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
1152:
1153: // attach possible yacy-sublevel-domain
1154: if ((yAddress != null)
1155: && ((pos = yAddress.indexOf("/")) >= 0))
1156: remotePath = yAddress.substring(pos) + remotePath;
1157:
1158: // removing hop by hop headers
1159: removeHopByHopHeaders(requestHeader);
1160:
1161: // adding outgoing headers
1162: setViaHeader(requestHeader, httpVer);
1163:
1164: // open the connection: second is needed for [AS] patch
1165: remote = (yAddress == null) ? newhttpc(host, port, timeout)
1166: : newhttpc(yAddress, timeout);
1167:
1168: // sending the http-HEAD request to the server
1169: res = remote.HEAD(remotePath, requestHeader);
1170:
1171: // determine if it's an internal error of the httpc
1172: if (res.responseHeader.size() == 0) {
1173: throw new Exception(res.statusText);
1174: }
1175:
1176: // removing hop by hop headers
1177: removeHopByHopHeaders(res.responseHeader);
1178:
1179: // adding outgoing headers
1180: setViaHeader(res.responseHeader, res.httpVer);
1181:
1182: // sending the server respond back to the client
1183: httpd.sendRespondHeader(conProp, respond, httpVer,
1184: res.statusCode, res.statusText, res.responseHeader);
1185: respond.flush();
1186: remote.close();
1187: } catch (Exception e) {
1188: handleProxyException(e, remote, conProp, respond, url);
1189: }
1190: }
1191:
1192: public static void doPost(Properties conProp,
1193: httpHeader requestHeader, OutputStream respond,
1194: PushbackInputStream body) throws IOException {
1195:
1196: httpc remote = null;
1197: yacyURL url = null;
1198: try {
1199: // remembering the starting time of the request
1200: Date requestDate = new Date(); // remember the time...
1201: conProp.put(httpHeader.CONNECTION_PROP_REQUEST_START,
1202: new Long(requestDate.getTime()));
1203: if (yacyTrigger)
1204: de.anomic.yacy.yacyCore.triggerOnlineAction();
1205: switchboard.proxyLastAccess = System.currentTimeMillis();
1206:
1207: // using an ByteCount OutputStream to count the send bytes
1208: respond = new httpdByteCountOutputStream(respond,
1209: conProp.getProperty(
1210: httpHeader.CONNECTION_PROP_REQUESTLINE)
1211: .length() + 2, "PROXY");
1212:
1213: String host = conProp
1214: .getProperty(httpHeader.CONNECTION_PROP_HOST);
1215: String path = conProp
1216: .getProperty(httpHeader.CONNECTION_PROP_PATH);
1217: String args = conProp
1218: .getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
1219: String httpVer = conProp
1220: .getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
1221:
1222: int port, pos;
1223: if ((pos = host.indexOf(":")) < 0) {
1224: port = 80;
1225: } else {
1226: port = Integer.parseInt(host.substring(pos + 1));
1227: host = host.substring(0, pos);
1228: }
1229:
1230: try {
1231: url = new yacyURL("http", host, port,
1232: (args == null) ? path : path + "?" + args);
1233: } catch (MalformedURLException e) {
1234: String errorMsg = "ERROR: internal error with url generation: host="
1235: + host
1236: + ", port="
1237: + port
1238: + ", path="
1239: + path
1240: + ", args=" + args;
1241: serverLog.logSevere("PROXY", errorMsg);
1242: httpd.sendRespondError(conProp, respond, 4, 501, null,
1243: errorMsg, e);
1244: return;
1245: }
1246:
1247: // set another userAgent, if not yellowlisted
1248: if (!(yellowList.contains(domain(host).toLowerCase()))) {
1249: // change the User-Agent
1250: requestHeader.put(httpHeader.USER_AGENT,
1251: generateUserAgent(requestHeader));
1252: }
1253:
1254: // setting the X-Forwarded-For Header
1255: if (switchboard.getConfigBool(
1256: "proxy.sendXForwardedForHeader", true)) {
1257: requestHeader
1258: .put(
1259: httpHeader.X_FORWARDED_FOR,
1260: conProp
1261: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
1262: }
1263:
1264: // resolve yacy and yacyh domains
1265: String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
1266:
1267: // re-calc the url path
1268: String remotePath = (args == null) ? path
1269: : (path + "?" + args);
1270:
1271: // attach possible yacy-sublevel-domain
1272: if ((yAddress != null)
1273: && ((pos = yAddress.indexOf("/")) >= 0))
1274: remotePath = yAddress.substring(pos) + remotePath;
1275:
1276: // removing hop by hop headers
1277: removeHopByHopHeaders(requestHeader);
1278:
1279: // adding additional headers
1280: setViaHeader(requestHeader, httpVer);
1281:
1282: // sending the request
1283: remote = (yAddress == null) ? newhttpc(host, port, timeout)
1284: : newhttpc(yAddress, timeout);
1285: httpc.response res = remote.POST(remotePath, requestHeader,
1286: body);
1287:
1288: // determine if it's an internal error of the httpc
1289: if (res.responseHeader.size() == 0) {
1290: throw new Exception(res.statusText);
1291: }
1292:
1293: // if the content length is not set we need to use chunked content encoding
1294: long contentLength = res.responseHeader.contentLength();
1295: httpChunkedOutputStream chunked = null;
1296: if (contentLength <= 0) {
1297: // according to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
1298: // a 204,304 message must not contain a message body.
1299: // Therefore we need to set the content-length to 0.
1300: if (res.status.startsWith("204")
1301: || res.status.startsWith("304")) {
1302: res.responseHeader.put(httpHeader.CONTENT_LENGTH,
1303: "0");
1304: } else {
1305: if (httpVer.equals("HTTP/0.9")
1306: || httpVer.equals("HTTP/1.0")) {
1307: forceConnectionClose(conProp);
1308: } else {
1309: chunked = new httpChunkedOutputStream(respond);
1310: }
1311: res.responseHeader
1312: .remove(httpHeader.CONTENT_LENGTH);
1313: }
1314: }
1315:
1316: // remove hop by hop headers
1317: removeHopByHopHeaders(res.responseHeader);
1318:
1319: // adding additional headers
1320: setViaHeader(res.responseHeader, res.httpVer);
1321:
1322: // sending the respond header back to the client
1323: if (chunked != null) {
1324: res.responseHeader.put(httpHeader.TRANSFER_ENCODING,
1325: "chunked");
1326: }
1327:
1328: // sending response headers
1329: httpd.sendRespondHeader(conProp, respond, httpVer,
1330: res.statusCode, res.statusText, res.responseHeader);
1331:
1332: // respondHeader(respond, res.status, res.responseHeader);
1333: res.writeContent((chunked != null) ? chunked : respond,
1334: null);
1335: if (chunked != null)
1336: chunked.finish();
1337:
1338: remote.close();
1339: respond.flush();
1340: } catch (Exception e) {
1341: handleProxyException(e, remote, conProp, respond, url);
1342: } finally {
1343: respond.flush();
1344: if (respond instanceof httpdByteCountOutputStream)
1345: ((httpdByteCountOutputStream) respond).finish();
1346:
1347: conProp.put(httpHeader.CONNECTION_PROP_REQUEST_END,
1348: new Long(System.currentTimeMillis()));
1349: conProp.put(httpHeader.CONNECTION_PROP_PROXY_RESPOND_SIZE,
1350: new Long(((httpdByteCountOutputStream) respond)
1351: .getCount()));
1352: logProxyAccess(conProp);
1353: }
1354: }
1355:
1356: public static void doConnect(Properties conProp,
1357: de.anomic.http.httpHeader requestHeader,
1358: InputStream clientIn, OutputStream clientOut)
1359: throws IOException {
1360:
1361: switchboard.proxyLastAccess = System.currentTimeMillis();
1362:
1363: String host = conProp
1364: .getProperty(httpHeader.CONNECTION_PROP_HOST);
1365: String httpVersion = conProp
1366: .getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
1367: String path = conProp
1368: .getProperty(httpHeader.CONNECTION_PROP_PATH);
1369: final String args = conProp
1370: .getProperty(httpHeader.CONNECTION_PROP_ARGS);
1371: if (args != null) {
1372: path = path + "?" + args;
1373: }
1374:
1375: int port, pos;
1376: if ((pos = host.indexOf(":")) < 0) {
1377: port = 80;
1378: } else {
1379: port = Integer.parseInt(host.substring(pos + 1));
1380: host = host.substring(0, pos);
1381: }
1382:
1383: // check the blacklist
1384: // blacklist idea inspired by [AS]:
1385: // respond a 404 for all AGIS ("all you get is shit") servers
1386: final String hostlow = host.toLowerCase();
1387: if (plasmaSwitchboard.urlBlacklist.isListed(
1388: plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
1389: httpd.sendRespondError(conProp, clientOut, 4, 403, null,
1390: "URL '" + hostlow
1391: + "' blocked by yacy proxy (blacklisted)",
1392: null);
1393: theLogger
1394: .logInfo("AGIS blocking of host '" + hostlow + "'");
1395: forceConnectionClose(conProp);
1396: return;
1397: }
1398:
1399: // possibly branch into PROXY-PROXY connection
1400: if ((switchboard.remoteProxyConfig != null)
1401: && (switchboard.remoteProxyConfig.useProxy())
1402: && (switchboard.remoteProxyConfig.useProxy4SSL())) {
1403: httpc remoteProxy = null;
1404: try {
1405: remoteProxy = new httpc(host, host, port, timeout,
1406: false, switchboard.remoteProxyConfig, null,
1407: null);
1408:
1409: httpc.response response = remoteProxy.CONNECT(host,
1410: port, requestHeader);
1411: response.print();
1412: if (response.success()) {
1413: // replace connection details
1414: host = switchboard.remoteProxyConfig.getProxyHost();
1415: port = switchboard.remoteProxyConfig.getProxyPort();
1416: remoteProxy.close();
1417: // go on (see below)
1418: } else {
1419: // pass error response back to client
1420: httpd.sendRespondHeader(conProp, clientOut,
1421: httpVersion, response.statusCode,
1422: response.statusText,
1423: response.responseHeader);
1424: //respondHeader(clientOut, response.status, response.responseHeader);
1425: forceConnectionClose(conProp);
1426: remoteProxy.close();
1427: return;
1428: }
1429: } catch (Exception e) {
1430: throw new IOException(e.getMessage());
1431: }
1432: }
1433:
1434: // try to establish connection to remote host
1435: Socket sslSocket = new Socket(host, port);
1436: sslSocket.setSoTimeout(timeout); // waiting time for write
1437: sslSocket.setSoLinger(true, timeout); // waiting time for read
1438: InputStream promiscuousIn = sslSocket.getInputStream();
1439: OutputStream promiscuousOut = sslSocket.getOutputStream();
1440:
1441: // now then we can return a success message
1442: clientOut.write((httpVersion + " 200 Connection established"
1443: + serverCore.CRLF_STRING + "Proxy-agent: YACY"
1444: + serverCore.CRLF_STRING + serverCore.CRLF_STRING)
1445: .getBytes());
1446:
1447: theLogger.logInfo("SSL connection to " + host + ":" + port
1448: + " established.");
1449:
1450: // start stream passing with mediate processes
1451: Mediate cs = new Mediate(sslSocket, clientIn, promiscuousOut);
1452: Mediate sc = new Mediate(sslSocket, promiscuousIn, clientOut);
1453: cs.start();
1454: sc.start();
1455: while ((sslSocket != null) && (sslSocket.isBound())
1456: && (!(sslSocket.isClosed()))
1457: && (sslSocket.isConnected())
1458: && ((cs.isAlive()) || (sc.isAlive()))) {
1459: // idle
1460: try {
1461: Thread.sleep(1000);
1462: } catch (InterruptedException e) {
1463: } // wait a while
1464: }
1465: // set stop mode
1466: cs.pleaseTerminate();
1467: sc.pleaseTerminate();
1468: // wake up thread
1469: cs.interrupt();
1470: sc.interrupt();
1471: // ...hope they have terminated...
1472: }
1473:
1474: public static class Mediate extends Thread {
1475:
1476: boolean terminate;
1477: Socket socket;
1478: InputStream in;
1479: OutputStream out;
1480:
1481: public Mediate(Socket socket, InputStream in, OutputStream out) {
1482: this .terminate = false;
1483: this .in = in;
1484: this .out = out;
1485: this .socket = socket;
1486: }
1487:
1488: public void run() {
1489: byte[] buffer = new byte[512];
1490: int len;
1491: try {
1492: while ((socket != null) && (socket.isBound())
1493: && (!(socket.isClosed()))
1494: && (socket.isConnected()) && (!(terminate))
1495: && (in != null) && (out != null)
1496: && ((len = in.read(buffer)) >= 0)) {
1497: out.write(buffer, 0, len);
1498: }
1499: } catch (IOException e) {
1500: }
1501: }
1502:
1503: public void pleaseTerminate() {
1504: terminate = true;
1505: }
1506: }
1507:
1508: private static httpc newhttpc(String server, int port, int timeout)
1509: throws IOException {
1510:
1511: // getting the remote proxy configuration
1512: httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
1513:
1514: // a new httpc connection, combined with possible remote proxy
1515: // check no-proxy rule
1516: if ((remProxyConfig != null)
1517: && (remProxyConfig.useProxy())
1518: && (!(remProxyConfig.remoteProxyAllowProxySet
1519: .contains(server)))) {
1520: if (remProxyConfig.remoteProxyDisallowProxySet
1521: .contains(server)) {
1522: remProxyConfig = null;
1523: } else {
1524: // analyse remoteProxyNoProxy;
1525: // set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
1526: int i = 0;
1527: while (i < remProxyConfig.getProxyNoProxyPatterns().length) {
1528: if (server.matches(remProxyConfig
1529: .getProxyNoProxyPatterns()[i])) {
1530: // disallow proxy for this server
1531: switchboard.remoteProxyConfig.remoteProxyDisallowProxySet
1532: .add(server);
1533: remProxyConfig = null;
1534: break;
1535: }
1536: i++;
1537: }
1538: if (i == remProxyConfig.getProxyNoProxyPatterns().length) {
1539: // no pattern matches: allow server
1540: switchboard.remoteProxyConfig.remoteProxyAllowProxySet
1541: .add(server);
1542: }
1543: }
1544: }
1545:
1546: // branch to server/proxy
1547: return new httpc(server, server, port, timeout, false,
1548: remProxyConfig, null, null);
1549: }
1550:
1551: private static httpc newhttpc(String address, int timeout)
1552: throws IOException {
1553: // a new httpc connection for <host>:<port>/<path> syntax
1554: // this is called when a '.yacy'-domain is used
1555: int p = address.indexOf(":");
1556: if (p < 0)
1557: return null;
1558: String server = address.substring(0, p);
1559: address = address.substring(p + 1);
1560: // remove possible path elements (may occur for 'virtual' subdomains
1561: p = address.indexOf("/");
1562: if (p >= 0)
1563: address = address.substring(0, p); // cut it off
1564: int port = Integer.parseInt(address);
1565: // normal creation of httpc object
1566: return newhttpc(server, port, timeout);
1567: }
1568:
1569: /*
1570: private void textMessage(OutputStream out, String body) throws IOException {
1571: out.write(("HTTP/1.1 200 OK\r\n").getBytes());
1572: out.write((httpHeader.SERVER + ": AnomicHTTPD (www.anomic.de)\r\n").getBytes());
1573: out.write((httpHeader.DATE + ": " + httpc.dateString(httpc.nowDate()) + "\r\n").getBytes());
1574: out.write((httpHeader.CONTENT_TYPE + ": text/plain\r\n").getBytes());
1575: out.write((httpHeader.CONTENT_LENGTH + ": " + body.length() +"\r\n").getBytes());
1576: out.write(("\r\n").getBytes());
1577: out.flush();
1578: out.write(body.getBytes());
1579: out.flush();
1580: }
1581: */
1582: private static void handleProxyException(Exception e, httpc remote,
1583: Properties conProp, OutputStream respond, yacyURL url) {
1584: // this may happen if
1585: // - the targeted host does not exist
1586: // - anything with the remote server was wrong.
1587: // - the client unexpectedly closed the connection ...
1588: try {
1589:
1590: // doing some errorhandling ...
1591: int httpStatusCode = 404;
1592: String httpStatusText = null;
1593: String errorMessage = null;
1594: Exception errorExc = null;
1595: boolean unknownError = false;
1596:
1597: // for customized error messages
1598: boolean detailedErrorMsg = false;
1599: String detailedErrorMsgFile = null;
1600: serverObjects detailedErrorMsgMap = null;
1601:
1602: if (e instanceof ConnectException) {
1603: httpStatusCode = 403;
1604: httpStatusText = "Connection refused";
1605: errorMessage = "Connection refused by destination host";
1606: } else if (e instanceof BindException) {
1607: errorMessage = "Unable to establish a connection to the destination host";
1608: } else if (e instanceof NoRouteToHostException) {
1609: errorMessage = "No route to destination host";
1610: } else if (e instanceof UnknownHostException) {
1611: //errorMessage = "IP address of the destination host could not be determined";
1612: try {
1613: detailedErrorMsgMap = unknownHostHandling(conProp);
1614: httpStatusText = "Unknown Host";
1615: detailedErrorMsg = true;
1616: detailedErrorMsgFile = "proxymsg/unknownHost.inc";
1617: } catch (Exception e1) {
1618: errorMessage = "IP address of the destination host could not be determined";
1619: }
1620: } else if (e instanceof SocketTimeoutException) {
1621: errorMessage = "Unable to establish a connection to the destination host. Connect timed out.";
1622: } else {
1623: String exceptionMsg = e.getMessage();
1624: if ((exceptionMsg != null)
1625: && (exceptionMsg
1626: .indexOf("Corrupt GZIP trailer") >= 0)) {
1627: // just do nothing, we leave it this way
1628: theLogger
1629: .logFine("ignoring bad gzip trail for URL "
1630: + url + " (" + e.getMessage() + ")");
1631: forceConnectionClose(conProp);
1632: } else if ((exceptionMsg != null)
1633: && (exceptionMsg.indexOf("Connection reset") >= 0)) {
1634: errorMessage = "Connection reset";
1635: } else if ((exceptionMsg != null)
1636: && (exceptionMsg.indexOf("unknown host") >= 0)) {
1637: try {
1638: detailedErrorMsgMap = unknownHostHandling(conProp);
1639: httpStatusText = "Unknown Host";
1640: detailedErrorMsg = true;
1641: detailedErrorMsgFile = "proxymsg/unknownHost.inc";
1642: } catch (Exception e1) {
1643: errorMessage = "IP address of the destination host could not be determined";
1644: }
1645: } else if ((exceptionMsg != null)
1646: && ((exceptionMsg.indexOf("socket write error") >= 0)
1647: || (exceptionMsg
1648: .indexOf("Read timed out") >= 0)
1649: || (exceptionMsg.indexOf("Broken pipe") >= 0) || (exceptionMsg
1650: .indexOf("server has closed connection") >= 0))) {
1651: errorMessage = exceptionMsg;
1652: } else if ((remote != null) && (remote.isClosed())) {
1653: // TODO: query for broken pipe
1654: errorMessage = "Destination host unexpectedly closed connection";
1655: } else {
1656: errorMessage = "Unexpected Error. "
1657: + e.getClass().getName() + ": "
1658: + e.getMessage();
1659: unknownError = true;
1660: errorExc = e;
1661: }
1662: }
1663:
1664: // sending back an error message to the client
1665: if (!conProp
1666: .containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
1667: if (detailedErrorMsg) {
1668: httpd.sendRespondError(conProp, respond,
1669: httpStatusCode, httpStatusText, new File(
1670: detailedErrorMsgFile),
1671: detailedErrorMsgMap, errorExc);
1672: } else {
1673: httpd.sendRespondError(conProp, respond, 4,
1674: httpStatusCode, httpStatusText,
1675: errorMessage, errorExc);
1676: }
1677: } else {
1678: if (unknownError) {
1679: theLogger
1680: .logFine(
1681: "Error while processing request '"
1682: + conProp
1683: .getProperty(
1684: httpHeader.CONNECTION_PROP_REQUESTLINE,
1685: "unknown")
1686: + "':"
1687: + "\n"
1688: + Thread.currentThread()
1689: .getName() + "\n"
1690: + errorMessage, e);
1691: } else {
1692: theLogger
1693: .logFine("Error while processing request '"
1694: + conProp
1695: .getProperty(
1696: httpHeader.CONNECTION_PROP_REQUESTLINE,
1697: "unknown") + "':"
1698: + "\n"
1699: + Thread.currentThread().getName()
1700: + "\n" + errorMessage);
1701: }
1702: forceConnectionClose(conProp);
1703: }
1704: } catch (Exception ee) {
1705: forceConnectionClose(conProp);
1706: }
1707:
1708: }
1709:
1710: private static serverObjects unknownHostHandling(Properties conProp)
1711: throws Exception {
1712: serverObjects detailedErrorMsgMap = new serverObjects();
1713:
1714: // generic toplevel domains
1715: HashSet<String> topLevelDomains = new HashSet<String>(Arrays
1716: .asList(new String[] { "aero", // Fluggesellschaften/Luftfahrt
1717: "arpa", // Einrichtung des ARPANet
1718: "biz", // Business
1719: "com", // Commercial
1720: "coop", // genossenschaftliche Unternehmen
1721: "edu", // Education
1722: "gov", // Government
1723: "info", // Informationsangebote
1724: "int", // International
1725: "jobs", // Jobangebote von Unternemen
1726: "mil", // Military (US-Militaer)
1727: // "museum", // Museen
1728: "name", // Privatpersonen
1729: "nato", // NATO (veraltet)
1730: "net", // Net (Netzwerkbetreiber)
1731: "org", // Organization (Nichtkommerzielle Organisation)
1732: "pro", // Professionals
1733: "travel", // Touristikindustrie
1734:
1735: // some country tlds
1736: "de", "at", "ch", "it", "uk" }));
1737:
1738: // getting some connection properties
1739: String orgHostPort = "80";
1740: String orgHostName = conProp.getProperty(
1741: httpHeader.CONNECTION_PROP_HOST, "unknown")
1742: .toLowerCase();
1743: int pos = orgHostName.indexOf(":");
1744: if (pos != -1) {
1745: orgHostPort = orgHostName.substring(pos + 1);
1746: orgHostName = orgHostName.substring(0, pos);
1747: }
1748: String orgHostPath = conProp.getProperty(
1749: httpHeader.CONNECTION_PROP_PATH, "");
1750: String orgHostArgs = conProp.getProperty(
1751: httpHeader.CONNECTION_PROP_ARGS, "");
1752: if (orgHostArgs.length() > 0)
1753: orgHostArgs = "?" + orgHostArgs;
1754: detailedErrorMsgMap.put("hostName", orgHostName);
1755:
1756: // guessing hostnames
1757: HashSet<String> testHostNames = new HashSet<String>();
1758: String testHostName = null;
1759: if (!orgHostName.startsWith("www.")) {
1760: testHostName = "www." + orgHostName;
1761: InetAddress addr = serverDomains.dnsResolve(testHostName);
1762: if (addr != null)
1763: testHostNames.add(testHostName);
1764: } else if (orgHostName.startsWith("www.")) {
1765: testHostName = orgHostName.substring(4);
1766: InetAddress addr = serverDomains.dnsResolve(testHostName);
1767: if (addr != null)
1768: if (addr != null)
1769: testHostNames.add(testHostName);
1770: }
1771: if (orgHostName.length() > 4 && orgHostName.startsWith("www")
1772: && (orgHostName.charAt(3) != '.')) {
1773: testHostName = orgHostName.substring(0, 3) + "."
1774: + orgHostName.substring(3);
1775: InetAddress addr = serverDomains.dnsResolve(testHostName);
1776: if (addr != null)
1777: if (addr != null)
1778: testHostNames.add(testHostName);
1779: }
1780:
1781: pos = orgHostName.lastIndexOf(".");
1782: if (pos != -1) {
1783: Iterator<String> iter = topLevelDomains.iterator();
1784: while (iter.hasNext()) {
1785: String topLevelDomain = iter.next();
1786: testHostName = orgHostName.substring(0, pos) + "."
1787: + topLevelDomain;
1788: InetAddress addr = serverDomains
1789: .dnsResolve(testHostName);
1790: if (addr != null)
1791: if (addr != null)
1792: testHostNames.add(testHostName);
1793: }
1794: }
1795:
1796: int hostNameCount = 0;
1797: Iterator<String> iter = testHostNames.iterator();
1798: while (iter.hasNext()) {
1799: testHostName = iter.next();
1800: detailedErrorMsgMap.put("list_" + hostNameCount
1801: + "_hostName", testHostName);
1802: detailedErrorMsgMap.put("list_" + hostNameCount
1803: + "_hostPort", orgHostPort);
1804: detailedErrorMsgMap.put("list_" + hostNameCount
1805: + "_hostPath", orgHostPath);
1806: detailedErrorMsgMap.put("list_" + hostNameCount
1807: + "_hostArgs", orgHostArgs);
1808: hostNameCount++;
1809: }
1810:
1811: detailedErrorMsgMap.put("list", hostNameCount);
1812:
1813: if (hostNameCount != 0) {
1814: detailedErrorMsgMap.put("showList", 1);
1815: } else {
1816: detailedErrorMsgMap.put("showList", 0);
1817: }
1818:
1819: return detailedErrorMsgMap;
1820: }
1821:
1822: private static String generateUserAgent(httpHeader requestHeaders) {
1823: userAgentStr.setLength(0);
1824:
1825: String browserUserAgent = (String) requestHeaders.get(
1826: httpHeader.USER_AGENT, proxyUserAgent);
1827: int pos = browserUserAgent.lastIndexOf(')');
1828: if (pos >= 0) {
1829: userAgentStr.append(browserUserAgent.substring(0, pos))
1830: .append("; YaCy ").append(
1831: switchboard.getConfig("vString", "0.1"))
1832: .append("; yacy.net").append(
1833: browserUserAgent.substring(pos));
1834: } else {
1835: userAgentStr.append(browserUserAgent);
1836: }
1837:
1838: return new String(userAgentStr);
1839: }
1840:
1841: private static void setViaHeader(httpHeader header, String httpVer) {
1842: if (!switchboard.getConfigBool("proxy.sendViaHeader", true))
1843: return;
1844:
1845: // getting header set by other proxies in the chain
1846: StringBuffer viaValue = new StringBuffer();
1847: if (header.containsKey(httpHeader.VIA))
1848: viaValue.append((String) header.get(httpHeader.VIA));
1849: if (viaValue.length() > 0)
1850: viaValue.append(", ");
1851:
1852: // appending info about this peer
1853: viaValue.append(httpVer).append(" ").append(
1854: yacyCore.seedDB.mySeed().getName()).append(".yacy ")
1855: .append("(YaCy ").append(
1856: switchboard.getConfig("vString", "0.0"))
1857: .append(")");
1858:
1859: // storing header back
1860: header.put(httpHeader.VIA, new String(viaValue));
1861: }
1862:
1863: /**
1864: * This function is used to generate a logging message according to the
1865: * <a href="http://www.squid-cache.org/Doc/FAQ/FAQ-6.html">squid logging format</a>.<p>
1866: * e.g.<br>
1867: * <code>1117528623.857 178 192.168.1.201 TCP_MISS/200 1069 GET http://www.yacy.de/ - DIRECT/81.169.145.74 text/html</code>
1868: */
1869: private final static void logProxyAccess(Properties conProp) {
1870:
1871: if (!doAccessLogging)
1872: return;
1873:
1874: logMessage.setLength(0);
1875:
1876: // Timestamp
1877: String currentTimestamp = Long.toString(System
1878: .currentTimeMillis());
1879: int offset = currentTimestamp.length() - 3;
1880:
1881: logMessage.append(currentTimestamp.substring(0, offset));
1882: logMessage.append('.');
1883: logMessage.append(currentTimestamp.substring(offset));
1884: logMessage.append(' ');
1885:
1886: // Elapsed time
1887: Long requestStart = (Long) conProp
1888: .get(httpHeader.CONNECTION_PROP_REQUEST_START);
1889: Long requestEnd = (Long) conProp
1890: .get(httpHeader.CONNECTION_PROP_REQUEST_END);
1891: String elapsed = Long.toString(requestEnd.longValue()
1892: - requestStart.longValue());
1893:
1894: for (int i = 0; i < 6 - elapsed.length(); i++)
1895: logMessage.append(' ');
1896: logMessage.append(elapsed);
1897: logMessage.append(' ');
1898:
1899: // Remote Host
1900: String clientIP = conProp
1901: .getProperty(httpHeader.CONNECTION_PROP_CLIENTIP);
1902: logMessage.append(clientIP);
1903: logMessage.append(' ');
1904:
1905: // Code/Status
1906: String respondStatus = conProp
1907: .getProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_STATUS);
1908: String respondCode = conProp.getProperty(
1909: httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,
1910: "UNKNOWN");
1911: logMessage.append(respondCode);
1912: logMessage.append("/");
1913: logMessage.append(respondStatus);
1914: logMessage.append(' ');
1915:
1916: // Bytes
1917: Long bytes = (Long) conProp
1918: .get(httpHeader.CONNECTION_PROP_PROXY_RESPOND_SIZE);
1919: logMessage.append(bytes.toString());
1920: logMessage.append(' ');
1921:
1922: // Method
1923: String requestMethod = conProp
1924: .getProperty(httpHeader.CONNECTION_PROP_METHOD);
1925: logMessage.append(requestMethod);
1926: logMessage.append(' ');
1927:
1928: // URL
1929: String requestURL = conProp
1930: .getProperty(httpHeader.CONNECTION_PROP_URL);
1931: String requestArgs = conProp
1932: .getProperty(httpHeader.CONNECTION_PROP_ARGS);
1933: logMessage.append(requestURL);
1934: if (requestArgs != null) {
1935: logMessage.append("?").append(requestArgs);
1936: }
1937: logMessage.append(' ');
1938:
1939: // Rfc931
1940: logMessage.append("-");
1941: logMessage.append(' ');
1942:
1943: // Peerstatus/Peerhost
1944: String host = conProp
1945: .getProperty(httpHeader.CONNECTION_PROP_HOST);
1946: logMessage.append("DIRECT/");
1947: logMessage.append(host);
1948: logMessage.append(' ');
1949:
1950: // Type
1951: String mime = "-";
1952: if (conProp
1953: .containsKey(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
1954: httpHeader proxyRespondHeader = (httpHeader) conProp
1955: .get(httpHeader.CONNECTION_PROP_PROXY_RESPOND_HEADER);
1956: mime = proxyRespondHeader.mime();
1957: if (mime.indexOf(";") != -1) {
1958: mime = mime.substring(0, mime.indexOf(";"));
1959: }
1960: }
1961: logMessage.append(mime);
1962:
1963: // sending the logging message to the logger
1964: proxyLog.logFine(new String(logMessage));
1965: }
1966:
1967: }
1968:
1969: /*
1970: proxy test:
1971:
1972: http://www.chipchapin.com/WebTools/cookietest.php?
1973: http://xlists.aza.org/moderator/cookietest/cookietest1.php
1974: http://vancouver-webpages.com/proxy/cache-test.html
1975:
1976: */
|