001: package de.anomic.urlRedirector;
002:
003: import java.io.BufferedReader;
004: import java.io.IOException;
005: import java.io.InputStreamReader;
006: import java.io.PrintWriter;
007: import java.net.MalformedURLException;
008: import java.util.Date;
009:
010: import de.anomic.data.userDB;
011: import de.anomic.http.httpHeader;
012: import de.anomic.http.httpc;
013: import de.anomic.plasma.plasmaCrawlProfile;
014: import de.anomic.plasma.plasmaParser;
015: import de.anomic.plasma.plasmaSwitchboard;
016: import de.anomic.server.serverCore;
017: import de.anomic.server.serverHandler;
018: import de.anomic.server.logging.serverLog;
019: import de.anomic.server.serverCore.Session;
020: import de.anomic.yacy.yacyCore;
021: import de.anomic.yacy.yacyURL;
022:
023: public class urlRedirectord implements serverHandler {
024:
025: private serverCore.Session session;
026: private static plasmaSwitchboard switchboard = null;
027: private serverLog theLogger = new serverLog("URL-REDIRECTOR");
028: private static plasmaCrawlProfile.entry profile = null;
029: private String nextURL;
030:
031: public urlRedirectord() {
032: if (switchboard == null) {
033: switchboard = plasmaSwitchboard.getSwitchboard();
034: }
035:
036: if (profile == null) {
037: profile = switchboard.profilesActiveCrawls.newEntry(
038: // name
039: "URL Redirector",
040: // start URL
041: null,
042: // crawling filter
043: ".*", ".*",
044: // depth
045: 0, 0,
046: // recrawlIfOlder (minutes), if negative: do not re-crawl
047: -1,
048: // domFilterDepth, if negative: no auto-filter
049: -1,
050: // domMaxPages, if negative: no count restriction
051: -1,
052: // crawlDynamic
053: false,
054: // indexText
055: true,
056: // indexMedia
057: true,
058: // storeHTCache
059: false,
060: // storeTxCache
061: true,
062: // remoteIndexing
063: false,
064: // xsstopw
065: true,
066: // xdstopw
067: true,
068: // xpstopw
069: true);
070: }
071: }
072:
073: public String getURL() {
074: return this .nextURL;
075: }
076:
077: public void initSession(Session theSession) {
078: // getting current session
079: this .session = theSession;
080: }
081:
082: public String greeting() {
083: return null;
084: }
085:
086: public String error(Throwable e) {
087: return null;
088: }
089:
090: public Object clone() {
091: return null;
092: }
093:
094: public void reset() {
095: this .session = null;
096: }
097:
098: public Boolean EMPTY(String arg) throws IOException {
099: return null;
100: }
101:
102: public Boolean UNKNOWN(String requestLine) throws IOException {
103: return null;
104: }
105:
106: public Boolean REDIRECTOR(String requestLine) {
107: try {
108:
109: boolean authenticated = false;
110: String userName = null;
111: String md5Pwd = null;
112:
113: // setting timeout
114: this .session.controlSocket.setSoTimeout(0);
115:
116: String line = null;
117: BufferedReader inputReader = new BufferedReader(
118: new InputStreamReader(this .session.in));
119: PrintWriter outputWriter = new PrintWriter(this .session.out);
120:
121: while ((line = inputReader.readLine()) != null) {
122: if (line.equals("EXIT")) {
123: break;
124: } else if (line.startsWith("#")) {
125: outputWriter.print("\r\n");
126: outputWriter.flush();
127: continue;
128: } else if (line.startsWith("USER")) {
129: userName = line.substring(line.indexOf(" ")).trim();
130: } else if (line.startsWith("PWD")) {
131: if (userName != null) {
132: userDB.Entry userEntry = switchboard.userDB
133: .getEntry(userName);
134: if (userEntry != null) {
135: md5Pwd = line.substring(line.indexOf(" "))
136: .trim();
137: if (userEntry.getMD5EncodedUserPwd()
138: .equals(md5Pwd)) {
139: authenticated = true;
140: }
141: }
142: }
143: } else if (line.startsWith("MEDIAEXT")) {
144: String transferIgnoreList = plasmaParser
145: .getMediaExtList();
146: transferIgnoreList = transferIgnoreList.substring(
147: 1, transferIgnoreList.length() - 1);
148:
149: outputWriter.print(transferIgnoreList);
150: outputWriter.print("\r\n");
151: outputWriter.flush();
152: } else if (line.startsWith("DEPTH")) {
153: int pos = line.indexOf(" ");
154: if (pos != -1) {
155: String newDepth = line.substring(pos).trim();
156: this .theLogger
157: .logFine("Changing crawling depth to '"
158: + newDepth + "'.");
159: switchboard.profilesActiveCrawls.changeEntry(
160: profile, "generalDepth", newDepth);
161: }
162: outputWriter.print("\r\n");
163: outputWriter.flush();
164: } else if (line.startsWith("CRAWLDYNAMIC")) {
165: int pos = line.indexOf(" ");
166: if (pos != -1) {
167: String newValue = line.substring(pos).trim();
168: this .theLogger
169: .logFine("Changing crawl dynamic setting to '"
170: + newValue + "'");
171: switchboard.profilesActiveCrawls.changeEntry(
172: profile, "crawlingQ", newValue);
173: }
174: outputWriter.print("\r\n");
175: outputWriter.flush();
176: } else {
177: if (!authenticated) {
178: return Boolean.FALSE;
179: }
180:
181: int pos = line.indexOf(" ");
182: this .nextURL = (pos != -1) ? line.substring(0, pos)
183: : line;
184:
185: this .theLogger.logFine("Receiving request " + line);
186: outputWriter.print("\r\n");
187: outputWriter.flush();
188:
189: String reasonString = null;
190: try {
191: // generating URL Object
192: yacyURL reqURL = new yacyURL(this .nextURL, null);
193:
194: // getting URL mimeType
195: httpHeader header = httpc.whead(reqURL, reqURL
196: .getHost(), 10000, null, null,
197: switchboard.remoteProxyConfig);
198:
199: if (plasmaParser.supportedContent(
200: plasmaParser.PARSER_MODE_URLREDIRECTOR,
201: reqURL, header.mime())) {
202: // first delete old entry, if exists
203: String urlhash = reqURL.hash();
204: switchboard.wordIndex.loadedURL
205: .remove(urlhash);
206: switchboard.crawlQueues.noticeURL
207: .removeByURLHash(urlhash);
208: switchboard.crawlQueues.errorURL
209: .remove(urlhash);
210:
211: // enqueuing URL for crawling
212: reasonString = switchboard.crawlStacker
213: .stackCrawl(
214: reqURL,
215: null,
216: yacyCore.seedDB.mySeed().hash,
217: "URL Redirector",
218: new Date(), 0, profile);
219: } else {
220: reasonString = "Unsupporte file extension";
221: }
222: } catch (MalformedURLException badUrlEx) {
223: reasonString = "Malformed URL";
224: }
225:
226: if (reasonString != null) {
227: this .theLogger.logFine("URL " + nextURL
228: + " rejected. Reason: " + reasonString);
229: }
230: nextURL = null;
231: }
232: }
233:
234: this .theLogger.logFine("Connection terminated");
235:
236: // Terminating connection
237: return serverCore.TERMINATE_CONNECTION;
238: } catch (Exception e) {
239: this .theLogger.logSevere("Unexpected Error: "
240: + e.getMessage(), e);
241: return serverCore.TERMINATE_CONNECTION;
242: }
243: }
244:
245: }
|