001: /*
002:
003: ============================================================================
004: The Apache Software License, Version 1.1
005: ============================================================================
006:
007: Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
008:
009: Redistribution and use in source and binary forms, with or without modifica-
010: tion, are permitted provided that the following conditions are met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. Redistributions in binary form must reproduce the above copyright notice,
016: this list of conditions and the following disclaimer in the documentation
017: and/or other materials provided with the distribution.
018:
019: 3. The end-user documentation included with the redistribution, if any, must
020: include the following acknowledgment: "This product includes software
021: developed by the Apache Software Foundation (http://www.apache.org/)."
022: Alternately, this acknowledgment may appear in the software itself, if
023: and wherever such third-party acknowledgments normally appear.
024:
025: 4. The names "Batik" and "Apache Software Foundation" must not be
026: used to endorse or promote products derived from this software without
027: prior written permission. For written permission, please contact
028: apache@apache.org.
029:
030: 5. Products derived from this software may not be called "Apache", nor may
031: "Apache" appear in their name, without prior written permission of the
032: Apache Software Foundation.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
035: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
036: FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
037: APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
038: INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
039: DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
040: OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
041: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044:
045: This software consists of voluntary contributions made by many individuals
046: on behalf of the Apache Software Foundation. For more information on the
047: Apache Software Foundation, please see <http://www.apache.org/>.
048:
049: */
050:
051: package org.apache.batik.util;
052:
053: import java.io.BufferedInputStream;
054: import java.io.IOException;
055: import java.io.InputStream;
056: import java.net.HttpURLConnection;
057: import java.net.MalformedURLException;
058: import java.net.URL;
059: import java.net.URLConnection;
060: import java.util.Iterator;
061: import java.util.LinkedList;
062: import java.util.List;
063: import java.util.zip.GZIPInputStream;
064: import java.util.zip.InflaterInputStream;
065: import java.util.zip.ZipException;
066:
067: /**
068: * Holds the data for more URL's
069: *
070: * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
071: * @version $Id$
072: */
073: public class ParsedURLData {
074:
075: String HTTP_USER_AGENT_HEADER = "User-Agent";
076:
077: String HTTP_ACCEPT_HEADER = "Accept";
078: String HTTP_ACCEPT_LANGUAGE_HEADER = "Accept-Language";
079: String HTTP_ACCEPT_ENCODING_HEADER = "Accept-Encoding";
080:
081: protected static List acceptedEncodings = new LinkedList();
082: static {
083: acceptedEncodings.add("gzip");
084: }
085:
086: /**
087: * GZIP header magic number bytes, like found in a gzipped
088: * files, which are encoded in Intel format (ie. little indian).
089: */
090: public final static byte GZIP_MAGIC[] = { (byte) 0x1f, (byte) 0x8b };
091:
092: /**
093: * This is a utility function others can call that checks if
094: * is is a GZIP stream if so it returns a GZIPInputStream that
095: * will decode the contents, otherwise it returns (or a
096: * buffered version of is) untouched.
097: * @param is Stream that may potentially be a GZIP stream.
098: */
099: public static InputStream checkGZIP(InputStream is)
100: throws IOException {
101:
102: if (!is.markSupported())
103: is = new BufferedInputStream(is);
104: byte data[] = new byte[2];
105: try {
106: is.mark(2);
107: is.read(data);
108: is.reset();
109: } catch (Exception ex) {
110: is.reset();
111: return is;
112: }
113: if ((data[0] == GZIP_MAGIC[0]) && (data[1] == GZIP_MAGIC[1]))
114: return new GZIPInputStream(is);
115:
116: if (((data[0] & 0x0F) == 8) && ((data[0] >>> 4) <= 7)) {
117: // Check for a zlib (deflate) stream
118: int chk = ((((int) data[0]) & 0xFF) * 256 + (((int) data[1]) & 0xFF));
119: if ((chk % 31) == 0) {
120: try {
121: // I'm not really as certain of this check
122: // as I would like so I want to force it
123: // to decode part of the stream.
124: is.mark(100);
125: InputStream ret = new InflaterInputStream(is);
126: if (!ret.markSupported())
127: ret = new BufferedInputStream(ret);
128: ret.mark(2);
129: ret.read(data);
130: is.reset();
131: ret = new InflaterInputStream(is);
132: return ret;
133: } catch (ZipException ze) {
134: is.reset();
135: return is;
136: }
137: }
138: }
139:
140: return is;
141: }
142:
143: /**
144: * Since the Data instance is 'hidden' in the ParsedURL
145: * instance we make all our methods public. This makes it
146: * easy for the various Protocol Handlers to update an
147: * instance as parsing proceeds.
148: */
149: public String protocol = null;
150: public String host = null;
151: public int port = -1;
152: public String path = null;
153: public String ref = null;
154: public String contentType = null;
155: public String contentEncoding = null;
156:
157: public InputStream stream = null;
158: public boolean hasBeenOpened = false;
159:
160: /**
161: * Void constructor
162: */
163: public ParsedURLData() {
164: }
165:
166: /**
167: * Build from an existing URL.
168: */
169: public ParsedURLData(URL url) {
170: protocol = url.getProtocol();
171: if ((protocol != null) && (protocol.length() == 0))
172: protocol = null;
173:
174: host = url.getHost();
175: if ((host != null) && (host.length() == 0))
176: host = null;
177:
178: port = url.getPort();
179:
180: path = url.getFile();
181: if ((path != null) && (path.length() == 0))
182: path = null;
183:
184: ref = url.getRef();
185: if ((ref != null) && (ref.length() == 0))
186: ref = null;
187: }
188:
189: /**
190: * Attempts to build a normal java.net.URL instance from this
191: * URL.
192: */
193: protected URL buildURL() throws MalformedURLException {
194:
195: // System.out.println("File: " + file);
196: // if (ref != null)
197: // file += "#" + ref;
198: // System.err.println("Building: " + protocol + " - " +
199: // host + " - " + path);
200:
201: if ((protocol != null) && (host != null)) {
202: String file = "";
203: if (path != null)
204: file = path;
205: if (port == -1)
206: return new URL(protocol, host, file);
207:
208: return new URL(protocol, host, port, file);
209: }
210:
211: // System.err.println("toString: " + toString());
212: return new URL(toString());
213: }
214:
215: /**
216: * Implement Object.hashCode.
217: */
218: public int hashCode() {
219: int hc = port;
220: if (protocol != null)
221: hc ^= protocol.hashCode();
222: if (host != null)
223: hc ^= host.hashCode();
224:
225: // For some URLS path and ref can get fairly long
226: // and the most unique part is towards the end
227: // so we grab that part for HC purposes
228: if (path != null) {
229: int len = path.length();
230: if (len > 20)
231: hc ^= path.substring(len - 20).hashCode();
232: else
233: hc ^= path.hashCode();
234: }
235: if (ref != null) {
236: int len = ref.length();
237: if (len > 20)
238: hc ^= ref.substring(len - 20).hashCode();
239: else
240: hc ^= ref.hashCode();
241: }
242:
243: return hc;
244: }
245:
246: /**
247: * Implement Object.equals for ParsedURLData.
248: */
249: public boolean equals(Object obj) {
250: if (obj == null)
251: return false;
252: if (!(obj instanceof ParsedURLData))
253: return false;
254:
255: ParsedURLData ud = (ParsedURLData) obj;
256: if (ud.port != port)
257: return false;
258:
259: if (ud.protocol == null) {
260: if (protocol != null)
261: return false;
262: } else if (protocol == null)
263: return false;
264: else if (!ud.protocol.equals(protocol))
265: return false;
266:
267: if (ud.host == null) {
268: if (host != null)
269: return false;
270: } else if (host == null)
271: return false;
272: else if (!ud.host.equals(host))
273: return false;
274:
275: if (ud.ref == null) {
276: if (ref != null)
277: return false;
278: } else if (ref == null)
279: return false;
280: else if (!ud.ref.equals(ref))
281: return false;
282:
283: if (ud.path == null) {
284: if (path != null)
285: return false;
286: } else if (path == null)
287: return false;
288: else if (!ud.path.equals(path))
289: return false;
290:
291: return true;
292: }
293:
294: /**
295: * Returns the content type if available. This is only available
296: * for some protocols.
297: */
298: public String getContentType(String userAgent) {
299: if (contentType != null)
300: return contentType;
301:
302: if (!hasBeenOpened) {
303: try {
304: openStreamInternal(userAgent, null, null);
305: } catch (IOException ioe) { /* nothing */
306: }
307: }
308:
309: return contentType;
310: }
311:
312: /**
313: * Returns the content encoding if available. This is only available
314: * for some protocols.
315: */
316: public String getContentEncoding(String userAgent) {
317: if (contentEncoding != null)
318: return contentEncoding;
319:
320: if (!hasBeenOpened) {
321: try {
322: openStreamInternal(userAgent, null, null);
323: } catch (IOException ioe) { /* nothing */
324: }
325: }
326:
327: return contentEncoding;
328: }
329:
330: /**
331: * Returns true if the URL looks well formed and complete.
332: * This does not garuntee that the stream can be opened but
333: * is a good indication that things aren't totally messed up.
334: */
335: public boolean complete() {
336: try {
337: buildURL();
338: } catch (MalformedURLException mue) {
339: return false;
340: }
341: return true;
342: }
343:
344: /**
345: * Open the stream and check for common compression types. If
346: * the stream is found to be compressed with a standard
347: * compression type it is automatically decompressed.
348: * @param userAgent The user agent opening the stream (may be null).
349: * @param mimeTypes The expected mime types of the content
350: * in the returned InputStream (mapped to Http accept
351: * header among other possability). The elements of
352: * the iterator must be strings (may be null)
353: */
354: public InputStream openStream(String userAgent, Iterator mimeTypes)
355: throws IOException {
356: InputStream raw = openStreamInternal(userAgent, mimeTypes,
357: acceptedEncodings.iterator());
358: if (raw == null)
359: return null;
360: stream = null;
361:
362: return checkGZIP(raw);
363: }
364:
365: /**
366: * Open the stream and returns it. No checks are made to see
367: * if the stream is compressed or encoded in any way.
368: * @param userAgent The user agent opening the stream (may be null).
369: * @param mimeTypes The expected mime types of the content
370: * in the returned InputStream (mapped to Http accept
371: * header among other possability). The elements of
372: * the iterator must be strings (may be null)
373: */
374: public InputStream openStreamRaw(String userAgent,
375: Iterator mimeTypes) throws IOException {
376:
377: InputStream ret = openStreamInternal(userAgent, mimeTypes, null);
378: stream = null;
379: return ret;
380: }
381:
382: protected InputStream openStreamInternal(String userAgent,
383: Iterator mimeTypes, Iterator encodingTypes)
384: throws IOException {
385: if (stream != null)
386: return stream;
387:
388: hasBeenOpened = true;
389:
390: URL url = null;
391: try {
392: url = buildURL();
393: } catch (MalformedURLException mue) {
394: throw new IOException(
395: "Unable to make sense of URL for connection");
396: }
397:
398: if (url == null)
399: return null;
400:
401: URLConnection urlC = url.openConnection();
402: if (urlC instanceof HttpURLConnection) {
403: if (userAgent != null)
404: urlC.setRequestProperty(HTTP_USER_AGENT_HEADER,
405: userAgent);
406:
407: if (mimeTypes != null) {
408: String acceptHeader = "";
409: while (mimeTypes.hasNext()) {
410: acceptHeader += mimeTypes.next();
411: if (mimeTypes.hasNext())
412: acceptHeader += ",";
413: }
414: urlC.setRequestProperty(HTTP_ACCEPT_HEADER,
415: acceptHeader);
416: }
417:
418: if (encodingTypes != null) {
419: String encodingHeader = "";
420: while (encodingTypes.hasNext()) {
421: encodingHeader += encodingTypes.next();
422: if (encodingTypes.hasNext())
423: encodingHeader += ",";
424: }
425: urlC.setRequestProperty(HTTP_ACCEPT_ENCODING_HEADER,
426: encodingHeader);
427: }
428:
429: contentType = urlC.getContentType();
430: contentEncoding = urlC.getContentEncoding();
431: }
432:
433: return (stream = urlC.getInputStream());
434: }
435:
436: /**
437: * Returns the URL up to and include the port number on
438: * the host. Does not include the path or fragment pieces.
439: */
440: public String getPortStr() {
441: String portStr = "";
442: if (protocol != null)
443: portStr += protocol + ":";
444:
445: if ((host != null) || (port != -1)) {
446: portStr += "//";
447: if (host != null)
448: portStr += host;
449: if (port != -1)
450: portStr += ":" + port;
451: }
452:
453: return portStr;
454: }
455:
456: protected boolean sameFile(ParsedURLData other) {
457: if (this == other)
458: return true;
459:
460: // Check if the rest of the two PURLs matche other than
461: // the 'ref'
462: if ((port == other.port)
463: && ((path == other.path) || ((path != null) && path
464: .equals(other.path)))
465: && ((host == other.host) || ((host != null) && host
466: .equals(other.host)))
467: && ((protocol == other.protocol) || ((protocol != null) && protocol
468: .equals(other.protocol))))
469: return true;
470:
471: return false;
472: }
473:
474: /**
475: * Return a string representation of the data.
476: */
477: public String toString() {
478: String ret = getPortStr();
479: if (path != null)
480: ret += path;
481:
482: if (ref != null)
483: ret += "#" + ref;
484:
485: return ret;
486: }
487: }
|