01: /*
02: * regain - A file search engine providing plenty of formats
03: * Copyright (C) 2004 Til Schneider
04: *
05: * This library is free software; you can redistribute it and/or
06: * modify it under the terms of the GNU Lesser General Public
07: * License as published by the Free Software Foundation; either
08: * version 2.1 of the License, or (at your option) any later version.
09: *
10: * This library is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Lesser General Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser General Public
16: * License along with this library; if not, write to the Free Software
17: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18: *
19: * Contact: Til Schneider, info@murfman.de
20: *
21: * CVS information:
22: * $RCSfile$
23: * $Source$
24: * $Date: 2005-05-11 11:21:40 +0200 (Mi, 11 Mai 2005) $
25: * $Author: til132 $
26: * $Revision: 134 $
27: */
28: package net.sf.regain.crawler.config;
29:
30: import net.sf.regain.RegainException;
31:
32: import org.apache.regexp.RE;
33: import org.apache.regexp.RESyntaxException;
34:
35: /**
36: * An UrlMatcher that matches URLs that match to a regular expression.
37: *
38: * @author Tilman Schneider, STZ-IDA an der FH Karlsruhe
39: */
40: public class RegexUrlMatcher implements UrlMatcher {
41:
42: /** The regex as String. */
43: private String mUrlRegexAsString;
44:
45: /** The regex a URL must match to in order to be matched by this matcher. */
46: private RE mUrlRegex;
47:
48: /**
49: * Creates a new instance of RegexUrlMatcher.
50: *
51: * @param regex The regular expression a URL must match to in order to be
52: * matched by this matcher.
53: * @throws RegainException
54: */
55: public RegexUrlMatcher(String regex) throws RegainException {
56: mUrlRegexAsString = regex;
57:
58: try {
59: mUrlRegex = new RE(regex);
60: } catch (RESyntaxException exc) {
61: throw new RegainException(
62: "Regular expression of URL matcher has a "
63: + "wrong syntax: '" + regex + "'", exc);
64: }
65: }
66:
67: /**
68: * Checks whether a URL matches to the rules of this matcher.
69: *
70: * @param url The URL to check.
71: * @return Whether the given URL matches to the rules of this matcher.
72: */
73: public boolean matches(String url) {
74: return mUrlRegex.match(url);
75: }
76:
77: /**
78: * Gets a String representation of this UrlMatcher.
79: */
80: public String toString() {
81: return mUrlRegexAsString;
82: }
83:
84: }
|