001: // ProxyDispatcher.java
002: // $Id: ProxyDispatcher.java,v 1.31 2000/08/16 21:38:05 ylafon Exp $
003: // (c) COPYRIGHT MIT and INRIA, 1996.
004: // please first read the full copyright statement in file COPYRIGHT.HTML
005:
006: package org.w3c.www.protocol.http.proxy;
007:
008: import java.io.BufferedInputStream;
009: import java.io.File;
010: import java.io.FileInputStream;
011: import java.io.IOException;
012: import java.io.InputStream;
013: import java.io.PrintStream;
014:
015: import java.net.URL;
016: import java.net.URLConnection;
017:
018: import org.w3c.util.ObservableProperties;
019: import org.w3c.util.PropertyMonitoring;
020:
021: import org.w3c.www.protocol.http.HttpException;
022: import org.w3c.www.protocol.http.HttpManager;
023: import org.w3c.www.protocol.http.PropRequestFilter;
024: import org.w3c.www.protocol.http.Reply;
025: import org.w3c.www.protocol.http.Request;
026:
027: import org.w3c.www.http.HttpRequestMessage;
028:
029: /**
030: * The proxy dispatcher applies some <em>rules</em> to a request.
031: * The goal of that filter is to allow special pre-processing of requests
032: * based, on their target host, before sending them off the net.
033: * <p>The filter is configured through a <em>rule file</em> whose format
034: * is described by the following BNF:
035: * <code>
036: * rule-file=(<em>record</em>)*<br>
037: * record=<strong>EOL</strong>|<em>comment</em>|<em>rule</em><br>
038: * comment=<strong>#</strong>(<strong>^EOL</strong>)*<strong>EOL</strong><br>
039: * rule=<em>rule-lhs</em>(<strong>SPACE</strong>)*<em>rule-rhs</em><br>
040: * rule-lhs=(<strong>token</strong>)
041: * |(<strong>token</strong> (<strong>.</strong> <strog>token</strong>)*<br>
042: * rule-lhr=<em>forbid</em>|<em>direct</em>|<em>redirect</em>
043: * |<em>proxy</em>|<em>authorization</em>|<em>proxyauth</em><br>
044: * forbid=<strong>FORBID</strong>|<strong>forbid</strong><br>
045: * direct=<strong>DIRECT</strong>|<strong>direct</strong><br>
046: * redirect=(<strong>REDIRECT</strong>|<strong>proxy</strong>) <em>url</em><br>
047: * proxy=(<strong>PROXY</strong>|<strong>proxy</strong>) <em>url</em><br>
048: * url=<strong>any valid URL</strong></br>
049: * authorization=(<strong>AUTHORIZATION</strong>|<strong>authorization</strong>
050: * <em>user</em> <em>password</em><br>
051: * proxyauth=(<strong>PROXYAUTH</strong>|<strong>proxyauth</strong>
052: * <em>user</em> <em>password</em> <em>url</em><br>
053: * </code>
054: * <p>A sample rule file looks like this:
055: * <code>
056: * # Some comments
057: *
058: * edu proxy http://class.w3.org:8001/
059: * org proxy http://class.w3.org:8001/
060: * fr direct
061: * www.evilsite.com redirect http://www.goodsite.com/warning.html
062: * www.w3.org direct
063: * 138.96.24 direct
064: * www.playboy.com forbid
065: * default proxy http://cache.inria.fr:8080/
066: * </code>
067: * <p>The algorithm used to lookup rules is the following:
068: * <ul>
069: * <li>Split all rules <em>left hand side</em> into its components, eg
070: * H1.H2.H3 is splitted into { H1, H2, H3 }, then reverse the components and
071: * map that to the rule. In our example above, { org, w3, www} would be mapped
072: * to <em>direct</em>.
073: * <li>Split the fully qualified host name into its components, eg, A.B.C is
074: * splitted into { A, B, C } and reverse it.
075: * <li>Find the longest match in the mapping table of rules, and get
076: * apply the given rule.
077: * </ul>
078: * <p>In our example, a request to <strong>www.isi.edu</strong> would match
079: * the <em>edu</em> rule, and a request for <strong>www.w3.org</strong>
080: * would match the <em>direct</em> rule, for example.
081: * <p>Three rules are defined:
082: * <dl>
083: * <dt>direct<dd>Run that request directly against the target host.
084: * <dt>forbid<dd>Emit a forbid message, indicating that the user is not
085: * allowed to contact this host.
086: * <dt>proxy<dd>Run that request through the given <em>proxy</em>.
087: * <dt>proxyauth<dd>Run that request through a proxy with the right proxy
088: * credentials.
089: * </dl>
090: * <p>For numeric IP addresses, the most significant part is the beginning,
091: * so {A, B, C} are deducted directly. In the example { 138, 96, 24 } is mapped
092: * to direct.
093: * <p>If no rules are applied, then the default rule (root rule) is applied.
094: * See the example.
095: */
096:
097: public class ProxyDispatcher implements PropRequestFilter,
098: PropertyMonitoring {
099: /**
100: * Name of the property giving the rule file URL.
101: */
102: public static final String RULE_P = "org.w3c.www.protocol.http.proxy.rules";
103:
104: /**
105: * Name of the property turning that filter in debug mode.
106: */
107: public static final String DEBUG_P = "org.w3c.www.protocol.http.proxy.debug";
108:
109: /**
110: * Name of the property turning that filter in debug mode.
111: */
112: public static final String CHECK_RULES_LAST_MODIFIED_P = "org.w3c.www.protocol.http.proxy.rules.check.lastmodified";
113:
114: /**
115: * The properties we initialized ourself from.
116: */
117: protected ObservableProperties props = null;
118: /**
119: * The current set of rules to apply.
120: */
121: protected RuleNode rules = null;
122: /**
123: * Are we in debug mode ?
124: */
125: protected boolean debug = false;
126:
127: protected boolean check_rules = false;
128:
129: protected static final String disabled = "disabled";
130:
131: protected long lastParsingTime = -1;
132:
133: /**
134: * Parse the given input stream as a rule file.
135: * @param in The input stream to parse.
136: * @exception IOException if an IO error occurs.
137: * @exception RuleParserException if parsing failed.
138: */
139:
140: protected void parseRules(InputStream in) throws IOException,
141: RuleParserException {
142: RuleParser parser = new RuleParser(in);
143: RuleNode nroot = parser.parse();
144: rules = nroot;
145: lastParsingTime = System.currentTimeMillis();
146: }
147:
148: /**
149: * Parse the default set of rules.
150: * <p>IOf the rules cannot be parsed, the filter emits an error
151: * message to standard error, and turn itself into transparent mode.
152: */
153:
154: protected void parseRules() {
155: if (debug)
156: System.out.println("PARSING RULES...");
157: String ruleurl = props.getString(RULE_P, null);
158: InputStream in = null;
159: // Try opening the rule file as a URL:
160: try {
161: URL url = new URL(ruleurl);
162: in = url.openStream();
163: } catch (Exception ex) {
164: // If this fails, it may be just a file name:
165: try {
166: in = (new BufferedInputStream(new FileInputStream(
167: new File(ruleurl))));
168: } catch (Exception nex) {
169: System.err
170: .println("* ProxyDispatcher: unable to open rule "
171: + "file \"" + ruleurl + "\"");
172: rules = null;
173: return;
174: }
175: }
176: // Parse that input stream as a rule file:
177: try {
178: parseRules(in);
179: } catch (Exception ex) {
180: System.err.println("Error parsing rules from: " + ruleurl);
181: ex.printStackTrace();
182: rules = null;
183: } finally {
184: if (in != null) {
185: try {
186: in.close();
187: } catch (IOException ex) {
188: }
189: }
190: }
191: if (debug)
192: System.out.println("DONE.");
193: }
194:
195: protected boolean needsParsing() {
196: if (rules == null)
197: return true;
198: if (!check_rules)
199: return false;
200: long rulesStamp = -1;
201: String ruleurl = props.getString(RULE_P, null);
202: try {
203: URL url = new URL(ruleurl);
204: if (url.getProtocol().equalsIgnoreCase("file")) {
205: File file = new File(url.getFile());
206: rulesStamp = file.lastModified();
207: } else {
208: URLConnection con = url.openConnection();
209: rulesStamp = con.getLastModified();
210: }
211: } catch (Exception ex) {
212: File file = new File(ruleurl);
213: rulesStamp = file.lastModified();
214: }
215: System.out.println("rulesStamp : " + rulesStamp);
216: return (lastParsingTime < rulesStamp);
217: }
218:
219: /**
220: * Filter requests before they are emitted.
221: * Look for a matching rule, and if found apply it before continuing
222: * the process. If a forbid rule was apply, this method will return
223: * with a <em>forbidden</em> message.
224: * @param request The request to filter.
225: * @return A Reply instance, if processing is not to be continued,
226: * <strong>false</strong>otherwise.
227: */
228:
229: public Reply ingoingFilter(Request request) {
230: if (needsParsing())
231: parseRules();
232: if (rules != null) {
233: URL url = request.getURL();
234: String host = url.getHost();
235: Rule rule = rules.lookupRule(host);
236: if (rule != null) {
237: if (debug) {
238: String args = rule.getRuleArgs();
239: if (args == null) {
240: args = "";
241: } else {
242: args = " " + args;
243: }
244: System.out.println("[" + getClass().getName()
245: + "]: applying rule <" + rule.getRuleName()
246: + args + "> to " + request.getURL());
247: }
248: return rule.apply(request);
249: }
250: }
251: return null;
252: }
253:
254: /**
255: * Filter requests when an error occurs during the process.
256: * This filter tries to do a direct connection if it is needed
257: * @param reques The request to filter.
258: * @param reply It's associated reply.
259: * @return Always <strong>null</strong>.
260: */
261:
262: public boolean exceptionFilter(Request request, HttpException ex) {
263: // if it was a proxy connection, try a direct one
264: // add test for exception here
265: if (request.hasProxy()) {
266: Reply reply = null;
267: HttpManager hm = HttpManager.getManager();
268: request.setProxy(null);
269: if (debug)
270: System.out.println("[" + getClass().getName()
271: + "]: direct fetch " + "for "
272: + request.getURL());
273: return true;
274: }
275: return false;
276: }
277:
278: /**
279: * Filter requests after processing.
280: * This filter doesn't do any post-processing.
281: * @param reques The request to filter.
282: * @param reply It's associated reply.
283: * @return Always <strong>null</strong>.
284: */
285:
286: public Reply outgoingFilter(Request request, Reply reply) {
287: return null;
288: }
289:
290: /**
291: * PropertyMonitoring implementation - Commit property changes.
292: * @param name The name of the property that has changed.
293: * @return A boolean <strong>true</strong> if change was commited,
294: * <strong>false</strong> otherwise.
295: */
296:
297: public boolean propertyChanged(String name) {
298: if (name.equals(RULE_P)) {
299: try {
300: parseRules();
301: } catch (Exception ex) {
302: ex.printStackTrace();
303: return false;
304: }
305: } else if (name.equals(DEBUG_P)) {
306: debug = props.getBoolean(DEBUG_P, false);
307: } else if (name.equals(CHECK_RULES_LAST_MODIFIED_P)) {
308: check_rules = props.getBoolean(CHECK_RULES_LAST_MODIFIED_P,
309: false);
310: }
311: return true;
312: }
313:
314: public void initialize(HttpManager manager) {
315: // Prepare empty entry list:
316: props = manager.getProperties();
317: props.registerObserver(this );
318: // Initialize from properties:
319: parseRules();
320: if (debug = props.getBoolean(DEBUG_P, false))
321: System.out.println("[" + getClass().getName()
322: + ": debuging on.");
323: check_rules = props.getBoolean(CHECK_RULES_LAST_MODIFIED_P,
324: false);
325: // Install ourself
326: manager.setFilter(this );
327: }
328:
329: /**
330: * We don't maintain cached infos.
331: */
332:
333: public void sync() {
334: }
335:
336: /**
337: * Empty constructor, for dynamic instantiation.
338: */
339:
340: public ProxyDispatcher() {
341: super();
342: }
343: }
|