001: /* $Id: Handler.java 4902 2007-02-16 00:07:40Z stack-sf $
002: *
003: * Created October 28th, 2006
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.net.s3;
024:
025: import java.io.IOException;
026: import java.io.InputStream;
027: import java.io.PrintStream;
028: import java.net.HttpURLConnection;
029: import java.net.URL;
030: import java.net.URLConnection;
031: import java.net.URLStreamHandler;
032: import java.text.SimpleDateFormat;
033: import java.util.Date;
034: import java.util.Locale;
035: import java.util.TimeZone;
036:
037: import org.jets3t.service.S3ServiceException;
038: import org.jets3t.service.impl.rest.httpclient.RestS3Service;
039: import org.jets3t.service.model.S3Bucket;
040: import org.jets3t.service.model.S3Object;
041: import org.jets3t.service.security.AWSCredentials;
042:
043: /**
044: * A protocol handler for an s3 scheme. Takes URLs of the form:
045: * <code>s3://aws.access.key.id:aws.access.key.secret@BUCKET/PATH</code> (Same
046: * as in hadoop).
047: *
048: * @author stack
049: */
050: public class Handler extends URLStreamHandler {
051: protected URLConnection openConnection(URL u) throws IOException {
052: // This looking for accessKey id and accessKey secret code is based
053: // on code from hadoop S3.
054: String accessKey = null;
055: String secretAccessKey = null;
056: String userInfo = u.getUserInfo();
057: if (userInfo != null) {
058: int index = userInfo.indexOf(':');
059: if (index != -1) {
060: accessKey = userInfo.substring(0, index);
061: secretAccessKey = userInfo.substring(index + 1);
062: } else {
063: accessKey = userInfo;
064: }
065: }
066: if (accessKey == null) {
067: accessKey = System.getProperty("aws.access.key.id");
068: }
069: if (secretAccessKey == null) {
070: secretAccessKey = System
071: .getProperty("aws.access.key.secret");
072: }
073: if (accessKey == null && secretAccessKey == null) {
074: throw new IllegalArgumentException(
075: "AWS "
076: + "Access Key ID and Secret Access Key "
077: + "must be specified as the username "
078: + "or password (respectively) of a s3 URL, "
079: + "or by setting the "
080: + "aws.access.key.id or "
081: + "aws.access.key.secret properties (respectively).");
082: } else if (accessKey == null) {
083: throw new IllegalArgumentException("AWS "
084: + "Access Key ID must be specified "
085: + "as the username of a s3 URL, or by setting the "
086: + "aws.access.key.id property.");
087: } else if (secretAccessKey == null) {
088: throw new IllegalArgumentException("AWS "
089: + "Secret Access Key must be specified "
090: + "as the password of a s3 URL, or by setting the "
091: + "aws.access.key.secret property.");
092: }
093:
094: RestS3Service s3Service;
095: try {
096: s3Service = new RestS3Service(new AWSCredentials(accessKey,
097: secretAccessKey));
098: } catch (S3ServiceException e) {
099: e.printStackTrace();
100: throw new IOException(e.toString());
101: }
102: InputStream is = null;
103: try {
104: // This opens the stream to the bucket/key object.
105: S3Object s3obj = s3Service
106: .getObject(new S3Bucket(u.getHost()), u.getPath()
107: .substring(1) /* Skip starting '/' character */);
108: is = s3obj.getDataInputStream();
109: } catch (S3ServiceException e) {
110: e.printStackTrace();
111: throw new IOException(e.toString());
112: }
113:
114: final InputStream inputStream = is;
115: return new URLConnection(u) {
116: private InputStream is = inputStream;
117:
118: @Override
119: public InputStream getInputStream() throws IOException {
120: return this .is;
121: }
122:
123: @Override
124: public void connect() throws IOException {
125: // Nothing to do. When we give back this object, we're
126: // connected.
127: }
128: };
129: }
130:
131: /**
132: * Main dumps rsync file to STDOUT.
133: * @param args
134: * @throws IOException
135: */
136: public static void main(String[] args) throws IOException {
137: if (args.length != 1) {
138: System.out
139: .println("Usage: java "
140: + "org.archive.net.s3.Handler "
141: + "s3://AWS_ACCESS_KEY_ID:AWS_ACCESS_KEY_SECRET@BUCKET/KEY");
142: System.exit(1);
143: }
144: URL u = new URL(args[0]);
145: URLConnection connect = u.openConnection();
146: // Write download to stdout.
147: final int bufferlength = 4096;
148: byte[] buffer = new byte[bufferlength];
149: InputStream is = connect.getInputStream();
150: try {
151: for (int count = -1; (count = is.read(buffer, 0,
152: bufferlength)) != -1;) {
153: System.out.write(buffer, 0, count);
154: }
155: System.out.flush();
156: } finally {
157: is.close();
158: }
159: }
160: }
|