01: /* $Id: Handler.java 4566 2006-08-31 16:51:41Z stack-sf $
02: *
03: * Created August 11th, 2006
04: *
05: * Copyright (C) 2006 Internet Archive.
06: *
07: * This file is part of the Heritrix web crawler (crawler.archive.org).
08: *
09: * Heritrix is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU Lesser Public License as published by
11: * the Free Software Foundation; either version 2.1 of the License, or
12: * any later version.
13: *
14: * Heritrix is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17: * GNU Lesser Public License for more details.
18: *
19: * You should have received a copy of the GNU Lesser Public License
20: * along with Heritrix; if not, write to the Free Software
21: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22: */
23: package org.archive.net.md5;
24:
25: import java.io.IOException;
26: import java.io.InputStream;
27: import java.net.URL;
28: import java.net.URLConnection;
29: import java.net.URLStreamHandler;
30:
31: /**
32: * A protocol handler for an 'md5' URI scheme.
33: * Md5 URLs look like this: <code>md5:deadbeefdeadbeefdeadbeefdeadbeef</code>
34: * When this handler is invoked against an md5 URL, it passes the raw md5 to
35: * the configured script as an argument. The configured script then does the
36: * work to bring the item pointed to by the md5 local so we can open a Stream
37: * on the local copy. Local file is deleted when we finish. Do
38: * {@link org.archive.net.DownloadURLConnection#getFile()} to get name of
39: * temporary file.
40: *
41: * <p>You need to define the system property
42: * <code>-Djava.protocol.handler.pkgs=org.archive.net</code> to add this handler
43: * to the java.net.URL set. Also define system properties
44: * <code>-Dorg.archive.net.md5.Md5URLConnection.path=PATH_TO_SCRIPT</code> to
45: * pass path of script to run as well as
46: * <code>-Dorg.archive.net.md5.Md5URLConnection.options=OPTIONS</code> for
47: * any options you'd like to include. The pointed-to PATH_TO_SCRIPT
48: * will be invoked as follows: <code>PATH_TO_SCRIPT OPTIONS MD5
49: * LOCAL_TMP_FILE</code>. The LOCAL_TMP_FILE file is made in
50: * <code>java.io.tmpdir</code> using java tmp name code.
51: * @author stack
52: */
53: public class Handler extends URLStreamHandler {
54: protected URLConnection openConnection(URL u) {
55: return new Md5URLConnection(u);
56: }
57:
58: /**
59: * Main dumps rsync file to STDOUT.
60: * @param args
61: * @throws IOException
62: */
63: public static void main(String[] args) throws IOException {
64: if (args.length != 1) {
65: System.out.println("Usage: java java "
66: + "-Djava.protocol.handler.pkgs=org.archive.net "
67: + "org.archive.net.md5.Handler "
68: + "md5:deadbeefdeadbeefdeadbeefdeadbeef");
69: System.exit(1);
70: }
71: System.setProperty("org.archive.net.md5.Md5URLConnection.path",
72: "/tmp/manifest");
73: System.setProperty("java.protocol.handler.pkgs",
74: "org.archive.net");
75: URL u = new URL(args[0]);
76: URLConnection connect = u.openConnection();
77: // Write download to stdout.
78: final int bufferlength = 4096;
79: byte[] buffer = new byte[bufferlength];
80: InputStream is = connect.getInputStream();
81: try {
82: for (int count = is.read(buffer, 0, bufferlength); (count = is
83: .read(buffer, 0, bufferlength)) != -1;) {
84: System.out.write(buffer, 0, count);
85: }
86: System.out.flush();
87: } finally {
88: is.close();
89: }
90: }
91: }
|