01: package net.jforum.util.legacy.clickstream;
02:
03: import java.util.Iterator;
04: import java.util.List;
05:
06: import javax.servlet.http.HttpServletRequest;
07:
08: import net.jforum.util.legacy.clickstream.config.ClickstreamConfig;
09: import net.jforum.util.legacy.clickstream.config.ConfigLoader;
10:
11: /**
12: * Determines if a request is actually a bot or spider.
13: *
14: * @author <a href="plightbo@hotmail.com">Patrick Lightbody</a>
15: * @author Rafael Steil (little hacks for JForum)
16: * @version $Id: BotChecker.java,v 1.6 2005/12/18 02:12:54 rafaelsteil Exp $
17: */
18: public class BotChecker {
19: /**
20: * Checks if we have a bot
21: * @param request the request
22: * @return <code>null</code> if there is no bots in the current request,
23: * or the bot's name otherwise
24: */
25: public static String isBot(HttpServletRequest request) {
26: if (request.getRequestURI().indexOf("robots.txt") != -1) {
27: // there is a specific request for the robots.txt file, so we assume
28: // it must be a robot (only robots request robots.txt)
29: return "Unknown (asked for robots.txt)";
30: }
31:
32: String userAgent = request.getHeader("User-Agent");
33:
34: ClickstreamConfig config = ConfigLoader.instance().getConfig();
35:
36: if (userAgent != null && config != null) {
37: List agents = config.getBotAgents();
38:
39: userAgent = userAgent.toLowerCase();
40:
41: for (Iterator iterator = agents.iterator(); iterator
42: .hasNext();) {
43: String agent = (String) iterator.next();
44:
45: if (agent == null) {
46: continue;
47: }
48:
49: if (userAgent.indexOf(agent) != -1) {
50: return userAgent;
51: }
52: }
53: }
54:
55: String remoteHost = request.getRemoteHost(); // requires a DNS lookup
56:
57: if (remoteHost != null && remoteHost.length() > 0
58: && remoteHost.charAt(remoteHost.length() - 1) > 64) {
59: List hosts = config.getBotHosts();
60:
61: remoteHost = remoteHost.toLowerCase();
62:
63: for (Iterator iterator = hosts.iterator(); iterator
64: .hasNext();) {
65: String host = (String) iterator.next();
66:
67: if (host == null) {
68: continue;
69: }
70:
71: if (remoteHost.indexOf(host) != -1) {
72: return remoteHost;
73: }
74: }
75: }
76:
77: return null;
78: }
79: }
|