001: // plasmaURLPattern.java
002: // -----------------------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2005
007: // last major change: 11.07.2005
008: //
009: // $LastChangedDate: 2008-01-23 23:08:32 +0000 (Mi, 23 Jan 2008) $
010: // $LastChangedRevision: 4382 $
011: // $LastChangedBy: orbiter $
012: //
013: // This program is free software; you can redistribute it and/or modify
014: // it under the terms of the GNU General Public License as published by
015: // the Free Software Foundation; either version 2 of the License, or
016: // (at your option) any later version.
017: //
018: // This program is distributed in the hope that it will be useful,
019: // but WITHOUT ANY WARRANTY; without even the implied warranty of
020: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
021: // GNU General Public License for more details.
022: //
023: // You should have received a copy of the GNU General Public License
024: // along with this program; if not, write to the Free Software
025: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
026: //
027: // Using this software in any meaning (reading, learning, copying, compiling,
028: // running) means that you agree that the Author(s) is (are) not responsible
029: // for cost, loss of data or any harm that may be caused directly or indirectly
030: // by usage of this softare or this documentation. The usage of this software
031: // is on your own risk. The installation and usage (starting/running) of this
032: // software may allow other people or application to access your computer and
033: // any attached devices and is highly dependent on the configuration of the
034: // software which must be done by the user of the software; the author(s) is
035: // (are) also not responsible for proper configuration and usage of the
036: // software, even if provoked by documentation provided together with
037: // the software.
038: //
039: // Any changes to this file according to the GPL as documented in the file
040: // gpl.txt aside this file in the shipment you received can be done to the
041: // lines that follows this copyright notice here, but changes must not be
042: // done inside the copyright notive above. A re-distribution must contain
043: // the intact and unchanged copyright notice.
044: // Contributions and changes to the program code must be marked as such.
045:
046: package de.anomic.plasma.urlPattern;
047:
048: import java.io.File;
049: import java.util.ArrayList;
050: import java.util.HashMap;
051:
052: public class defaultURLPattern extends abstractURLPattern implements
053: plasmaURLPattern {
054:
055: public defaultURLPattern(File rootPath) {
056: super (rootPath);
057: }
058:
059: public String getEngineInfo() {
060: return "Default YaCy Blacklist Engine";
061: }
062:
063: public boolean isListed(String blacklistType, String hostlow,
064: String path) {
065: if (hostlow == null)
066: throw new NullPointerException();
067: if (path == null)
068: throw new NullPointerException();
069:
070: // getting the proper blacklist
071: HashMap<String, ArrayList<String>> blacklistMap = super
072: .getBlacklistMap(blacklistType);
073:
074: if (path.length() > 0 && path.charAt(0) == '/')
075: path = path.substring(1);
076: ArrayList<String> app;
077: boolean matched = false;
078: String pp = ""; // path-pattern
079:
080: // first try to match the domain with wildcard '*'
081: // [TL] While "." are found within the string
082: int index = 0;
083: while (!matched
084: && (index = hostlow.indexOf('.', index + 1)) != -1) {
085: if ((app = blacklistMap.get(hostlow.substring(0, index + 1)
086: + "*")) != null) {
087: for (int i = app.size() - 1; !matched && i > -1; i--) {
088: pp = (String) app.get(i);
089: matched |= ((pp.equals("*")) || (path.matches(pp)));
090: }
091: }
092: }
093: index = hostlow.length();
094: while (!matched
095: && (index = hostlow.lastIndexOf('.', index - 1)) != -1) {
096: if ((app = blacklistMap.get("*"
097: + hostlow.substring(index, hostlow.length()))) != null) {
098: for (int i = app.size() - 1; !matched && i > -1; i--) {
099: pp = (String) app.get(i);
100: matched |= ((pp.equals("*")) || (path.matches(pp)));
101: }
102: }
103: }
104:
105: // try to match without wildcard in domain
106: if (!matched && (app = blacklistMap.get(hostlow)) != null) {
107: for (int i = app.size() - 1; !matched && i > -1; i--) {
108: pp = (String) app.get(i);
109: matched |= ((pp.equals("*")) || (path.matches(pp)));
110: }
111: }
112:
113: return matched;
114: }
115: }
|