001: /*
002: * This program is free software; you can redistribute it and/or
003: * modify it under the terms of the GNU General Public License
004: * as published by the Free Software Foundation; either version 2
005: * of the License, or (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU General Public License for more details.
011:
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
015: */
016: package net.sf.jftp.tools;
017:
018: import net.sf.jftp.*;
019: import net.sf.jftp.config.*;
020: import net.sf.jftp.gui.framework.*;
021: import net.sf.jftp.net.*;
022: import net.sf.jftp.system.LocalIO;
023: import net.sf.jftp.system.logging.Log;
024: import net.sf.jftp.util.*;
025:
026: import java.awt.*;
027: import java.awt.event.*;
028:
029: import java.io.*;
030:
031: import java.net.*;
032:
033: import java.util.*;
034:
035: import javax.swing.*;
036: import javax.swing.event.*;
037:
038: public class HttpSpider extends HPanel implements Runnable,
039: ActionListener {
040: private HTextField host = new HTextField("Full URL:",
041: "http://j-ftp.sourceforge.net/index.html", 30);
042: private HTextField type = new HTextField("Types (use * for all):",
043: "html-htm-css-gif-jpg-zip-gz-avi-mpg", 25);
044: private HTextField depth = new HTextField(
045: "Search up to this many levels deeper:", "1", 10);
046: private HTextField dir = new HTextField("Store files in:", "", 25);
047: private JPanel p1 = new JPanel();
048: private JPanel okP = new JPanel();
049: private JButton ok = new JButton("Start");
050: private int currentDepth = 0;
051: private int MAX = 1;
052: private String[] typeArray = { "mpg", "avi", "mpeg", "mov", "rm",
053: "wmv" };
054: private String localDir = ".";
055: private String[] argv;
056: private Thread runner;
057: private boolean stopflag = false;
058: private JButton stop = new JButton("Stop download (ASAP)");
059:
060: public HttpSpider(String localDir) {
061: this .localDir = localDir;
062:
063: //setSize(440,220);
064: //setLocation(200,250);
065: //setTitle("Http spider...");
066: //getContentPane().
067: setLayout(new BorderLayout());
068:
069: //setBackground(Color.lightGray);
070: p1.setLayout(new GridLayout(4, 1, 5, 5));
071: p1.add(host);
072: p1.add(type);
073: p1.add(depth);
074: dir.setText(localDir);
075: p1.add(dir);
076:
077: //getContentPane().
078: add("Center", p1);
079:
080: //getContentPane().
081: add("South", okP);
082: okP.add(ok);
083: ok.addActionListener(this );
084:
085: setVisible(true);
086: }
087:
088: public void actionPerformed(ActionEvent e) {
089: if (e.getSource() == ok) {
090: //this.dispose();
091: localDir = dir.getText();
092:
093: if (!localDir.endsWith("/")) {
094: localDir = localDir + "/";
095: }
096:
097: String[] argv2 = { host.getText().trim(),
098: type.getText().trim(), depth.getText().trim() };
099: argv = argv2;
100:
101: removeAll();
102: add(
103: "North",
104: new JLabel(
105: "Starting download, please watch the log window for details"));
106: add("Center", stop);
107: stop.addActionListener(this );
108: JFtp.statusP.jftp.setClosable(this .hashCode(), false);
109: validate();
110:
111: runner = new Thread(this );
112: runner.start();
113: } else if (e.getSource() == stop) {
114: stopflag = true;
115: }
116: }
117:
118: public void run() {
119: spider(argv);
120:
121: if (!stopflag) {
122: Log.debug("\nRecursive download finished.\nOuptut dir: "
123: + localDir);
124: } else {
125: Log.debug("\nRecursive download aborted.");
126: }
127:
128: JFtp.statusP.jftp.ensureLogging();
129: JFtp.statusP.jftp.removeFromDesktop(this .hashCode());
130: }
131:
132: private void spider(String[] argv) {
133: try {
134: String url = "http://j-ftp.sourceforge.net/index.html";
135:
136: if (argv.length >= 2) {
137: url = clear(argv[0]);
138:
139: if (url.indexOf("/") < 0) {
140: url = url + "/";
141: }
142:
143: typeArray = check(argv[1]);
144:
145: Log.debugRaw(">>> Scanning for ");
146:
147: for (int i = 0; i < typeArray.length; i++) {
148: Log.debugRaw(typeArray[i] + " ");
149: }
150:
151: Log.debug("");
152: }
153:
154: if (argv.length > 2) {
155: MAX = Integer.parseInt(argv[2]);
156: }
157:
158: //for(int i=0; i<typeArray.length; i++) Log.debug("+ "+typeArray[i]);
159: if (stopflag) {
160: return;
161: }
162:
163: Log.debug("Fetching initial HTML file...");
164:
165: Holer sammy = new Holer(localDir);
166: sammy.bringAnStart(url, true);
167:
168: if (stopflag) {
169: return;
170: }
171:
172: Log.debug("Searching for links...");
173: JFtp.statusP.jftp.ensureLogging();
174: LocalIO.pause(500);
175:
176: if (stopflag) {
177: return;
178: }
179:
180: smoke(url);
181: } catch (Exception ex) {
182: ex.printStackTrace();
183: }
184: }
185:
186: private String clear(String url) {
187: int idx = url.indexOf("http://");
188:
189: if (idx >= 0) {
190: url = url.substring(7);
191: }
192:
193: return url;
194: }
195:
196: private Vector addVector(Vector v, Vector x) {
197: Enumeration e = x.elements();
198:
199: while (e.hasMoreElements()) {
200: String next = (String) e.nextElement();
201: v.add(next);
202: }
203:
204: return v;
205: }
206:
207: private void smoke(String url) throws Exception {
208: if (stopflag) {
209: return;
210: }
211:
212: url = clear(url);
213:
214: Holer sammy = new Holer(localDir);
215: String zeug = sammy.holZeug(url);
216:
217: Vector m = sortiermal(zeug, url.substring(0, url
218: .lastIndexOf("/")), "href=\"");
219: m = addVector(m, sortiermal(zeug, url.substring(0, url
220: .lastIndexOf("/")), "src=\""));
221: m = addVector(m, sortiermal(zeug, url.substring(0, url
222: .lastIndexOf("/")), "HREF=\""));
223: m = addVector(m, sortiermal(zeug, url.substring(0, url
224: .lastIndexOf("/")), "SRC=\""));
225:
226: Enumeration mischen = m.elements();
227:
228: while (mischen.hasMoreElements()) {
229: if (stopflag) {
230: return;
231: }
232:
233: String next = (String) mischen.nextElement();
234:
235: Log.out("Processing: " + next);
236:
237: for (int i = 0; i < typeArray.length; i++) {
238: if (next.endsWith(typeArray[i])
239: || typeArray[i].trim().equals("*")) {
240: int x = next.indexOf("/");
241:
242: if ((x > 0)
243: && (next.substring(0, x).indexOf(".") > 0)) {
244: Holer nochnsammy = new Holer(localDir);
245: nochnsammy.bringAnStart(next, false);
246:
247: if (stopflag) {
248: return;
249: }
250:
251: continue;
252: }
253: }
254: }
255:
256: if (currentDepth < MAX) {
257: if (stopflag) {
258: return;
259: }
260:
261: int x = next.indexOf("/");
262:
263: if ((x > 0) && (next.substring(0, x).indexOf(".") > 0)) {
264: currentDepth++;
265: smoke(next);
266: currentDepth--;
267: }
268: }
269: }
270: }
271:
272: private Vector sortiermal(String zeug, String url, String index) {
273: Vector mischen = new Vector();
274: int wo = 0;
275:
276: while (true) {
277: wo = zeug.indexOf(index);
278:
279: if (wo < 0) {
280: return mischen;
281: }
282:
283: zeug = zeug.substring(wo + index.length());
284:
285: String was = zeug.substring(0, zeug.indexOf("\""));
286:
287: was = checker(was, url);
288: mischen.add(was);
289: Log.out("Added: " + was);
290: }
291: }
292:
293: private String[] check(String auswahl) {
294: StringTokenizer flyer = new StringTokenizer(auswahl, "-", false);
295: String[] einkauf = new String[flyer.countTokens()];
296: int tmp = 0;
297:
298: while (flyer.hasMoreElements()) {
299: einkauf[tmp] = (String) flyer.nextElement();
300: tmp++;
301: }
302:
303: return einkauf;
304: }
305:
306: private String checker(String was, String url) {
307: was = clear(was);
308:
309: if (was.startsWith(url)) {
310: return was;
311: }
312:
313: if (was.startsWith("/") && (url.indexOf("/") > 0)) {
314: was = url.substring(0, url.indexOf("/")) + was;
315: } else if (was.startsWith("/") && (url.indexOf("/") < 0)) {
316: was = url + was;
317: } else if ((was.indexOf(".") > 0)) {
318: int idx = was.indexOf("/");
319: String tmp = "";
320:
321: if (idx >= 0) {
322: tmp = was.substring(0, idx);
323: }
324:
325: if ((tmp.indexOf(".") > 0)) {
326: return clear(was);
327: }
328:
329: if (url.endsWith("/")) {
330: was = url + was;
331: } else {
332: was = url + "/" + was;
333: }
334: }
335:
336: Log.out("-> " + was);
337:
338: return was;
339: }
340:
341: public Insets getInsets() {
342: return new Insets(5, 5, 5, 5);
343: }
344: }
345:
346: class Holer {
347: private String localDir = null;
348:
349: public Holer(String localDir) {
350: this .localDir = localDir;
351: }
352:
353: public String holZeug(String wat) {
354: try {
355: String dealer = wat.substring(0, wat.indexOf("/"));
356: String wo = wat.substring(wat.indexOf("/"));
357: String zeug = "";
358:
359: Log.out(">> " + dealer + wo);
360:
361: Socket deal = new Socket(dealer, 80);
362: deal.setSoTimeout(5000);
363:
364: BufferedWriter order = new BufferedWriter(
365: new OutputStreamWriter(deal.getOutputStream()));
366: BufferedReader checkung = new BufferedReader(
367: new InputStreamReader(deal.getInputStream()));
368:
369: order.write("GET http://" + wat + " HTTP/1.0\n\n");
370: order.flush();
371:
372: int len = 0;
373:
374: while (!checkung.ready() && (len < 5000)) {
375: chill(100);
376: len += 100;
377: }
378:
379: while (checkung.ready()) {
380: zeug = zeug + checkung.readLine();
381: }
382:
383: order.close();
384: checkung.close();
385:
386: return zeug;
387: } catch (Exception ex) {
388: ex.printStackTrace();
389: }
390:
391: return "";
392: }
393:
394: public void bringAnStart(String wat, boolean force) {
395: try {
396: String dealer = wat.substring(0, wat.indexOf("/"));
397: String wo = wat.substring(wat.indexOf("/"));
398: String zeug = "";
399:
400: Log.debug(">>> " + dealer + wo);
401:
402: //JFtp.statusP.jftp.ensureLogging();
403: File d = new File(localDir);
404: d.mkdir();
405:
406: File f = new File(localDir
407: + wo.substring(wo.lastIndexOf("/") + 1));
408:
409: if (f.exists() && !force) {
410: Log.debug(">>> file already exists...");
411:
412: return;
413: } else {
414: f.delete();
415: }
416:
417: Socket deal = new Socket(dealer, 80);
418: BufferedWriter order = new BufferedWriter(
419: new OutputStreamWriter(deal.getOutputStream()));
420: DataInputStream checkung = new DataInputStream(
421: new BufferedInputStream(deal.getInputStream()));
422:
423: BufferedOutputStream vorrat = new BufferedOutputStream(
424: new FileOutputStream(localDir
425: + wo.substring(wo.lastIndexOf("/") + 1)));
426:
427: byte[] alu = new byte[2048];
428:
429: order.write("GET http://" + wat + " HTTP/1.0\n\n");
430: order.flush();
431:
432: boolean line = true;
433: boolean bin = false;
434:
435: while (true) {
436: chill(10);
437:
438: String tmp = "";
439:
440: while (line) {
441: String x = checkung.readLine();
442:
443: if (x == null) {
444: break;
445: }
446:
447: tmp += (x + "\n");
448:
449: if (x.equals("")) {
450: line = false;
451: }
452: }
453:
454: int x = checkung.read(alu);
455:
456: if (x == -1) {
457: if (line) {
458: vorrat.write(tmp.getBytes(), 0, tmp.length());
459: }
460:
461: order.close();
462: checkung.close();
463: vorrat.flush();
464: vorrat.close();
465:
466: return;
467: } else {
468: vorrat.write(alu, 0, x);
469: }
470: }
471: } catch (Exception ex) {
472: ex.printStackTrace();
473: }
474: }
475:
476: private static void chill(int time) {
477: try {
478: Thread.sleep(time);
479: } catch (Exception ex) {
480: }
481: }
482: }
|