001: package com.quadcap.http.client;
002:
003: /* Copyright 1998 - 2003 Quadcap Software. All rights reserved.
004: *
005: * This software is distributed under the Quadcap Free Software License.
006: * This software may be used or modified for any purpose, personal or
007: * commercial. Open Source redistributions are permitted. Commercial
008: * redistribution of larger works derived from, or works which bundle
009: * this software requires a "Commercial Redistribution License"; see
010: * http://www.quadcap.com/purchase.
011: *
012: * Redistributions qualify as "Open Source" under one of the following terms:
013: *
014: * Redistributions are made at no charge beyond the reasonable cost of
015: * materials and delivery.
016: *
017: * Redistributions are accompanied by a copy of the Source Code or by an
018: * irrevocable offer to provide a copy of the Source Code for up to three
019: * years at the cost of materials and delivery. Such redistributions
020: * must allow further use, modification, and redistribution of the Source
021: * Code under substantially the same terms as this license.
022: *
023: * Redistributions of source code must retain the copyright notices as they
024: * appear in each source code file, these license terms, and the
025: * disclaimer/limitation of liability set forth as paragraph 6 below.
026: *
027: * Redistributions in binary form must reproduce this Copyright Notice,
028: * these license terms, and the disclaimer/limitation of liability set
029: * forth as paragraph 6 below, in the documentation and/or other materials
030: * provided with the distribution.
031: *
032: * The Software is provided on an "AS IS" basis. No warranty is
033: * provided that the Software is free of defects, or fit for a
034: * particular purpose.
035: *
036: * Limitation of Liability. Quadcap Software shall not be liable
037: * for any damages suffered by the Licensee or any third party resulting
038: * from use of the Software.
039: */
040:
041: import java.io.*;
042: import java.util.*;
043:
044: import java.net.Socket;
045: import java.net.URL;
046: import java.net.URLEncoder;
047: import java.net.URLConnection;
048:
049: import org.xml.sax.InputSource;
050:
051: import com.quadcap.http.util.HeaderParser;
052:
053: import com.quadcap.util.collections.ArrayQueue;
054:
055: import com.quadcap.util.text.OctetMap;
056: import com.quadcap.util.text.Scanner;
057:
058: import com.quadcap.util.Debug;
059: import com.quadcap.util.Util;
060:
061: import com.quadcap.io.IO;
062: import com.quadcap.io.LimitedInputStream;
063: import com.quadcap.io.NullOutputStream;
064:
065: public class HttpFetcher {
066: static boolean checkLinks = false;
067: static boolean showResponseHeaders = false;
068:
069: static byte[] delims = { 0x0d, 0x0a, 0x0d, 0x0a };
070:
071: public static byte[] fetch(String url) throws Exception {
072: return fetch(url, new ArrayList());
073: }
074:
075: public static byte[] fetch(String url, List headers)
076: throws Exception {
077: InputStream is = fetchStream(url, headers);
078: byte[] doc = readStream(is);
079: is.close();
080: return doc;
081: }
082:
083: public static byte[] post(String url, String fileName, List headers)
084: throws Exception {
085: InputStream is = postStream(url, fileName, headers);
086: byte[] doc = readStream(is);
087: is.close();
088: return doc;
089:
090: }
091:
092: public static InputStream postStream(String url, String fileName,
093: List headers) throws Exception {
094: if (url.indexOf("http://") != 0) {
095: System.err.println("Bad url (protocol): " + url);
096: return null;
097: }
098: url = url.substring(7);
099: int idx = url.indexOf('/');
100: if (idx <= 0) {
101: url = url + "/";
102: idx = url.indexOf('/');
103: }
104: String host = url.substring(0, idx);
105: String name = url.substring(idx);
106: int port = 80;
107: idx = host.indexOf(':');
108: if (idx >= 0) {
109: port = Integer.parseInt(host.substring(idx + 1));
110: host = host.substring(0, idx);
111: }
112: Socket s = new Socket(host, port);
113:
114: headers.add("Content-Length: "
115: + String.valueOf(new File(fileName).length()));
116:
117: OutputStream sos = s.getOutputStream();
118: BufferedOutputStream os = new BufferedOutputStream(sos);
119: os.write(("POST " + name + " HTTP/1.0\r\n").getBytes());
120: Iterator iter = headers.iterator();
121: while (iter.hasNext()) {
122: IO.write(os, iter.next().toString());
123: os.write("\r\n".getBytes());
124: }
125: os.write("\r\n".getBytes());
126:
127: FileInputStream fis = new FileInputStream(fileName);
128: IO.copyStream(fis, os);
129: os.flush();
130:
131: InputStream is = s.getInputStream();
132: return new BufferedInputStream(is);
133: }
134:
135: public static InputStream fetchStream(String url) throws Exception {
136: return fetchStream(url, new ArrayList());
137: }
138:
139: public static void ripPlayList(InputStream is, List headers)
140: throws Exception {
141: BufferedReader br = new BufferedReader(
142: new InputStreamReader(is));
143: String line;
144: while ((line = br.readLine()) != null) {
145: String[] v = line.split("=");
146: if (v.length == 2 && v[0].equals("File1")) {
147: is.close();
148: ripStream(v[1], headers);
149: return;
150: }
151: }
152: is.close();
153: }
154:
155: public static void ripStream(String url, List headers)
156: throws Exception {
157: InputStream is = fetchStream(url, headers);
158: StringBuffer sb = new StringBuffer();
159: for (int c = is.read(); c != '\n'; c = is.read()) {
160: sb.append((char) c);
161: }
162: Debug.println("ripStream(" + url + "), Response: " + sb);
163: Map hdrs = HeaderParser.parseHeaders(is);
164: Debug.println("Headers = " + hdrs);
165: if (sb.toString().indexOf("302") > 0) {
166: is.close();
167: Debug.println("redirecting to: " + hdrs.get("location"));
168: ripStream(hdrs.get("location").toString(), headers);
169: return;
170: }
171: String contentType = String.valueOf(hdrs.get("content-type"));
172: if (contentType.equalsIgnoreCase("audio/x-scpls")) {
173: ripPlayList(is, headers);
174: return;
175: }
176: int metaInt = 0;
177: try {
178: metaInt = Integer.parseInt(String.valueOf(hdrs
179: .get("icy-metaint")));
180: } catch (Throwable t) {
181: }
182: byte[] buf = new byte[metaInt];
183: String title = null;
184: String lastTitle = "__INVALID_lastTitle__";
185: Mp3FrameStream out = new Mp3FrameStream();
186: FileOutputStream fout = null;
187: while (true) {
188: int cnt = is.read(buf);
189: while (cnt < buf.length) {
190: if (cnt < 0) {
191: if (out != null) {
192: out.close();
193: }
194: Debug.println("partial buffer, returning... ("
195: + cnt + ")");
196: return;
197: }
198: cnt += is.read(buf, cnt, buf.length - cnt);
199: }
200: title = getTitle(is).replace('/', ' ');
201: if (title.length() > 0) {
202: if (!title.equals(lastTitle)) {
203: Debug.println("Title: " + title);
204: if (fout == null) {
205: // Start a new file
206: fout = new FileOutputStream(title);
207: out.init(fout, new NullOutputStream());
208: out.write(buf);
209: } else {
210: // split the difference
211: out.write(buf, 0, metaInt / 2);
212: out.close();
213: fout = new FileOutputStream(title);
214: out.init(fout, new NullOutputStream());
215: out.write(buf, metaInt / 2, metaInt / 2);
216: }
217: lastTitle = title;
218: }
219: } else {
220: if (fout != null) {
221: out.write(buf);
222: }
223: }
224: }
225: }
226:
227: public static String getTitle(InputStream in) throws IOException {
228: byte[] buf = new byte[in.read() * 16];
229: in.read(buf);
230: StringBuffer sb = new StringBuffer();
231: for (int i = 0; i < buf.length && buf[i] != 0; i++) {
232: sb.append((char) (buf[i]));
233: }
234: String[] p = sb.toString().split(";");
235: for (int i = 0; i < p.length; i++) {
236: String[] v = p[i].trim().split("=");
237: if (v.length == 2 && v[0].equalsIgnoreCase("StreamTitle")) {
238: String s = v[1].substring(1, v[1].length() - 1);
239: while (s.toLowerCase().endsWith(".mp3")) {
240: s = s.substring(0, s.length() - 4);
241: }
242: s += ".mp3";
243: return s;
244: }
245: }
246: return "";
247: }
248:
249: public static InputStream fetchStream(String url, List headers)
250: throws IOException {
251: //Debug.println(0, "Fetch: " + url);
252: if (url.indexOf("http://") != 0) {
253: System.err.println("Bad url (protocol): " + url);
254: return null;
255: }
256: url = url.substring(7);
257: int idx = url.indexOf('/');
258: if (idx <= 0) {
259: url = url + "/";
260: idx = url.length() - 1;
261: }
262: String host = url.substring(0, idx);
263: String name = url.substring(idx);
264: int port = 80;
265: idx = host.indexOf(':');
266: if (idx >= 0) {
267: port = Integer.parseInt(host.substring(idx + 1));
268: host = host.substring(0, idx);
269: }
270: Socket s = new Socket(host, port);
271:
272: ByteArrayOutputStream bos = new ByteArrayOutputStream();
273: Debug.println("GET " + name);
274: bos.write(("GET " + name + " HTTP/1.0\r\n").getBytes());
275: Iterator iter = headers.iterator();
276: while (iter.hasNext()) {
277: String hdr = iter.next().toString();
278: IO.write(bos, hdr);
279: Debug.println(" " + hdr);
280: bos.write('\r');
281: bos.write('\n');
282: }
283: bos.write("\r\n".getBytes());
284:
285: s.getOutputStream().write(bos.toByteArray());
286:
287: InputStream is = s.getInputStream();
288: return new BufferedInputStream(is);
289: }
290:
291: public static InputStream fetch2(String url) throws Exception {
292: System.out.println("Fetch: " + url);
293: URLConnection c = new URL(url).openConnection();
294: c.connect();
295: return c.getInputStream();
296: }
297:
298: public static byte[] readStream(InputStream is) throws IOException {
299: ByteArrayOutputStream bos = new ByteArrayOutputStream();
300:
301: int state = 0;
302: int cnt = 0;
303: if (showResponseHeaders)
304: state = 5;
305: while (state < 4) {
306: int c = is.read();
307: if (c < 0) {
308: throw new IOException(
309: "unexpected eof in message headers");
310: }
311: if (delims[state] == c)
312: state++;
313: else if (delims[0] == c)
314: state = 1;
315: else
316: state = 0;
317: }
318:
319: byte[] buf = new byte[1024];
320: while ((cnt = is.read(buf)) > 0) {
321: bos.write(buf, 0, cnt);
322: }
323: return bos.toByteArray();
324: }
325:
326: public static Hashtable buildTable(String fname) throws Exception {
327: BufferedReader r = new BufferedReader(new FileReader(fname));
328: String turl;
329: Hashtable t = new Hashtable();
330: while ((turl = r.readLine()) != null) {
331: url = turl;
332: byte[] doc = fetch(url);
333: System.err.println(url + ": " + Util.strBytes(doc));
334: t.put(url, doc);
335: }
336: return t;
337: }
338:
339: public static void checkTable(String fname, Hashtable t)
340: throws Exception {
341: BufferedReader r = new BufferedReader(new FileReader(fname));
342: String url;
343: while ((url = r.readLine()) != null) {
344: try {
345: byte[] doc = fetch(url);
346: byte[] exp = (byte[]) t.get(url);
347: if (Util.compareBytes(doc, exp) != 0) {
348: System.err.println("Failed: " + url);
349: System.err.println("Doc: " + Util.strBytes(doc));
350: }
351: } catch (Exception e) {
352: Debug.print(e);
353: }
354: }
355: }
356:
357: public static void addAV(String fname) throws Exception {
358: BufferedReader r = new BufferedReader(new FileReader(fname));
359: String url;
360: Hashtable t = new Hashtable();
361: while ((url = r.readLine()) != null) {
362: System.out.println("url: " + url);
363: StringBuffer sb = new StringBuffer(
364: "http://add-url.altavista.com/cgi-bin/newurl?ad=1&q=");
365: sb.append(URLEncoder.encode(url));
366: byte[] doc = fetch(sb.toString());
367: String s = new String(doc);
368: if (s.indexOf("The page was fetched") < 0) {
369: System.out.println(s);
370: break;
371: }
372: try {
373: Thread.sleep(2000);
374: } catch (Throwable dt) {
375: }
376: }
377: }
378:
379: public static void check(String name) throws Exception {
380: LinkChecker lc = new LinkChecker(name);
381: lc.run();
382: lc.printBadLinks();
383: }
384:
385: public static void main(String args[]) {
386: Debug.debugMode = Debug.debugAll;
387: Debug.debugStream = System.out;
388: try {
389: doit(args);
390: } catch (Exception e) {
391: System.out.println("Exception: " + e.toString());
392: Debug.print(e);
393: }
394: }
395:
396: static String fname = null;
397: static int repeat = 2;
398: static int delay = 0;
399: static Hashtable t;
400: static boolean times = false;
401: static boolean rip = false;
402: static String url = null;
403: static int limit = 0;
404:
405: public static void doit() throws Exception {
406: if (times) {
407: for (int i = 0; i < repeat; i++) {
408: fetch(url);
409: }
410: } else {
411: for (int i = 0; i < repeat; i++) {
412: checkTable(fname, t);
413: if (delay > 0)
414: Thread.sleep(delay);
415: }
416: }
417: }
418:
419: public static void doit(String args[]) throws Exception {
420: int numThreads = 1;
421: boolean av = false;
422: String post = null;
423: List headers = new ArrayList();
424:
425: int ac = 0;
426: while (ac < args.length) {
427: String arg = args[ac].trim();
428: if (arg.charAt(0) != '-')
429: break;
430: ac++;
431: if (arg.equals("-urls")) {
432: fname = args[ac++];
433: } else if (arg.equals("-count")) {
434: repeat = Integer.parseInt(args[ac++]);
435: } else if (arg.equals("-delay")) {
436: delay = Integer.parseInt(args[ac++]);
437: } else if (arg.equals("-threads")) {
438: numThreads = Integer.parseInt(args[ac++]);
439: } else if (arg.equals("-checklinks")) {
440: checkLinks = true;
441: fname = args[ac++];
442: } else if (arg.equals("-headers")) {
443: showResponseHeaders = true;
444: } else if (arg.equals("-post")) {
445: post = args[ac++];
446: } else if (arg.equals("-header")) {
447: String hName = args[ac++];
448: String hVal = args[ac++];
449: headers.add(hName + ": " + hVal);
450: } else if (arg.equals("-altavista")) {
451: av = true;
452: fname = args[ac++];
453: } else if (arg.equals("-times")) {
454: times = true;
455: } else if (arg.equals("-limit")) {
456: limit = Integer.parseInt(args[ac++]);
457: } else if (arg.equals("-rip")) {
458: if (!rip) {
459: headers.add("Host: 192.168.1.8");
460: headers.add("User-Agent: WinampMPEG/2.8");
461: headers.add("Accept: */*");
462: headers.add("Icy-Metadata:1");
463: headers.add("Connection: close");
464: }
465: rip = true;
466: } else {
467: //throw new Exception("??");
468: }
469: }
470:
471: if (av) {
472: addAV(fname);
473: } else if (checkLinks) {
474: check(fname);
475: return;
476: } else if (fname == null && !times) {
477: url = args[ac];
478: if (rip) {
479: ripStream(url, headers);
480: return;
481: }
482: if (post == null) {
483: InputStream in = fetchStream(url, headers);
484: if (limit > 0) {
485: in = new LimitedInputStream(in, limit);
486: }
487: try {
488: IO.copyStream(in, System.out);
489: } finally {
490: in.close();
491: }
492: } else {
493: byte[] doc = post(url, post, headers);
494: System.out.write(doc);
495: }
496: } else {
497: t = buildTable(fname);
498: Thread[] threads = new Thread[numThreads];
499: for (int i = 0; i < numThreads; i++) {
500: threads[i] = new Thread() {
501: public void run() {
502: try {
503: doit();
504: } catch (Throwable t) {
505: Debug.print(t);
506: }
507: }
508: };
509: }
510: long start = System.currentTimeMillis();
511: for (int i = 0; i < numThreads; i++) {
512: threads[i].start();
513: }
514: for (int i = 0; i < numThreads; i++) {
515: try {
516: threads[i].join();
517: } catch (Throwable t) {
518: Debug.print(t);
519: }
520: }
521: long stop = System.currentTimeMillis();
522: long elap = stop - start;
523: int r_s = (int) ((repeat * numThreads * 1000) / elap);
524: if (times) {
525: System.out.println("" + elap + " elapsed");
526: System.out.println("" + r_s + " requests/second");
527: }
528: }
529: }
530: }
|