001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
003:
004: Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
005:
006: This program is free software; you can redistribute it and/or modify
007: it under the terms of the GNU Lesser General Public License as published by
008: the Free Software Foundation; either version 2.1 of the License, or
009: (at your option) any later version.
010:
011: This program is distributed in the hope that it will be useful,
012: but WITHOUT ANY WARRANTY; without even the implied warranty of
013: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: GNU Lesser General Public License for more details.
015:
016: You should have received a copy of the GNU Lesser General Public License
017: along with this program; if not, write to the Free Software
018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: */
020: package com.ecyrd.jspwiki.plugin;
021:
022: import com.ecyrd.jspwiki.*;
023: import org.apache.log4j.Logger;
024: import org.apache.oro.text.*;
025: import org.apache.oro.text.regex.*;
026:
027: import java.util.*;
028: import java.io.InputStream;
029: import java.io.IOException;
030:
031: import javax.servlet.http.HttpServletRequest;
032:
033: /**
034: * Denounces a link by removing it from any search engine. The bots are listed
035: * in com/ecyrd/jspwiki/plugin/denounce.properties.
036: *
037: * @author Janne Jalkanen
038: * @since 2.1.40.
039: */
040: public class Denounce implements WikiPlugin {
041: private static Logger log = Logger.getLogger(Denounce.class);
042:
043: public static final String PARAM_LINK = "link";
044: public static final String PARAM_TEXT = "text";
045:
046: public static final String PROPERTYFILE = "com/ecyrd/jspwiki/plugin/denounce.properties";
047: public static final String PROP_AGENTPATTERN = "denounce.agentpattern.";
048: public static final String PROP_HOSTPATTERN = "denounce.hostpattern.";
049: public static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
050:
051: public static final String PROP_DENOUNCETEXT = "denounce.denouncetext";
052:
053: private static ArrayList c_refererPatterns = new ArrayList();
054: private static ArrayList c_agentPatterns = new ArrayList();
055: private static ArrayList c_hostPatterns = new ArrayList();
056:
057: private static String c_denounceText = "";
058:
059: /**
060: * Prepares the different patterns for later use. Compiling is
061: * (probably) expensive, so we do it statically at class load time.
062: */
063: static {
064: try {
065: PatternCompiler compiler = new GlobCompiler();
066: ClassLoader loader = Denounce.class.getClassLoader();
067:
068: InputStream in = loader.getResourceAsStream(PROPERTYFILE);
069:
070: if (in == null) {
071: throw new IOException(
072: "No property file found! (Check the installation, it should be there.)");
073: }
074:
075: Properties props = new Properties();
076: props.load(in);
077:
078: c_denounceText = props.getProperty(PROP_DENOUNCETEXT,
079: c_denounceText);
080:
081: for (Enumeration e = props.propertyNames(); e
082: .hasMoreElements();) {
083: String name = (String) e.nextElement();
084:
085: try {
086: if (name.startsWith(PROP_REFERERPATTERN)) {
087: c_refererPatterns.add(compiler.compile(props
088: .getProperty(name)));
089: } else if (name.startsWith(PROP_AGENTPATTERN)) {
090: c_agentPatterns.add(compiler.compile(props
091: .getProperty(name)));
092: } else if (name.startsWith(PROP_HOSTPATTERN)) {
093: c_hostPatterns.add(compiler.compile(props
094: .getProperty(name)));
095: }
096: } catch (MalformedPatternException ex) {
097: log.error("Malformed URL pattern in "
098: + PROPERTYFILE + ": "
099: + props.getProperty(name), ex);
100: }
101: }
102:
103: log.debug("Added " + c_refererPatterns.size()
104: + c_agentPatterns.size() + c_hostPatterns.size()
105: + " crawlers to denounce list.");
106: } catch (IOException e) {
107: log.error("Unable to load URL patterns from "
108: + PROPERTYFILE, e);
109: } catch (Exception e) {
110: log.error("Unable to initialize Denounce plugin", e);
111: }
112: }
113:
114: public String execute(WikiContext context, Map params)
115: throws PluginException {
116: String link = (String) params.get(PARAM_LINK);
117: String text = (String) params.get(PARAM_TEXT);
118: boolean linkAllowed = true;
119:
120: if (link == null) {
121: throw new PluginException("Denounce: No parameter "
122: + PARAM_LINK + " defined!");
123: }
124:
125: HttpServletRequest request = context.getHttpRequest();
126:
127: if (request != null) {
128: linkAllowed = !matchHeaders(request);
129: }
130:
131: if (text == null)
132: text = link;
133:
134: if (linkAllowed) {
135: // FIXME: Should really call TranslatorReader
136: return "<a href=\"" + link + "\">" + text + "</a>";
137: }
138:
139: return c_denounceText;
140: }
141:
142: /**
143: * Returns true, if the path is found among the referers.
144: */
145: private boolean matchPattern(List list, String path) {
146: PatternMatcher matcher = new Perl5Matcher();
147:
148: for (Iterator i = list.iterator(); i.hasNext();) {
149: if (matcher.matches(path, (Pattern) i.next())) {
150: return true;
151: }
152: }
153:
154: return false;
155: }
156:
157: // FIXME: Should really return immediately when a match is found.
158:
159: private boolean matchHeaders(HttpServletRequest request) {
160: //
161: // User Agent
162: //
163:
164: String userAgent = request.getHeader("User-Agent");
165:
166: if (userAgent != null
167: && matchPattern(c_agentPatterns, userAgent)) {
168: log.debug("Matched user agent " + userAgent
169: + " for denounce.");
170: return true;
171: }
172:
173: //
174: // Referrer header
175: //
176:
177: String refererPath = request.getHeader("Referer");
178:
179: if (refererPath != null
180: && matchPattern(c_refererPatterns, refererPath)) {
181: log.debug("Matched referer " + refererPath
182: + " for denounce.");
183: return true;
184: }
185:
186: //
187: // Host
188: //
189:
190: String host = request.getRemoteHost();
191:
192: if (host != null && matchPattern(c_hostPatterns, host)) {
193: log.debug("Matched host " + host + " for denounce.");
194: return true;
195: }
196:
197: return false;
198: }
199: }
|