001: /*
002:
003: ============================================================================
004: The Apache Software License, Version 1.1
005: ============================================================================
006:
007: Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
008:
009: Redistribution and use in source and binary forms, with or without modifica-
010: tion, are permitted provided that the following conditions are met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. Redistributions in binary form must reproduce the above copyright notice,
016: this list of conditions and the following disclaimer in the documentation
017: and/or other materials provided with the distribution.
018:
019: 3. The end-user documentation included with the redistribution, if any, must
020: include the following acknowledgment: "This product includes software
021: developed by the Apache Software Foundation (http://www.apache.org/)."
022: Alternately, this acknowledgment may appear in the software itself, if
023: and wherever such third-party acknowledgments normally appear.
024:
025: 4. The names "Batik" and "Apache Software Foundation" must not be
026: used to endorse or promote products derived from this software without
027: prior written permission. For written permission, please contact
028: apache@apache.org.
029:
030: 5. Products derived from this software may not be called "Apache", nor may
031: "Apache" appear in their name, without prior written permission of the
032: Apache Software Foundation.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
035: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
036: FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
037: APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
038: INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
039: DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
040: OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
041: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044:
045: This software consists of voluntary contributions made by many individuals
046: on behalf of the Apache Software Foundation. For more information on the
047: Apache Software Foundation, please see <http://www.apache.org/>.
048:
049: */
050:
051: package org.apache.batik.util;
052:
053: import java.io.IOException;
054: import java.io.InputStream;
055: import java.net.URL;
056: import java.util.ArrayList;
057: import java.util.HashMap;
058: import java.util.Iterator;
059: import java.util.List;
060: import java.util.Map;
061:
062: /**
063: * This class is used as a replacement for java.net.URL. This is done
064: * for several reasons. First unlike java.net.URL this class will
065: * accept and parse as much of a URL as possible, without throwing a
066: * MalformedURL exception. This makes it extreamly useful for simply
067: * parsing a URL string (hence it's name).
068: *
069: * Second it allows for extension of the protocols supported by the
070: * URL parser. Batik uses this to support the 'Data' protocol.
071: *
072: * Third by default it checks the streams that it opens to see if they
073: * are GZIP compressed, if so it automatically uncompresses them
074: * (avoiding opening the stream twice in the processes).
075: *
076: * It is worth noting that most real work is defered to the
077: * ParsedURLData class to which most methods are forwarded. This is
078: * done because it allows a constructor interface to ParsedURL (mostly
079: * for compatability with core URL), in spite of the fact that the
080: * real implemenation uses the protocol handlers as factories for
081: * protocol specific instances of the ParsedURLData class.
082: *
083: * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
084: * @version $Id$
085: */
086: public class ParsedURL {
087:
088: /**
089: * The data class we defer most things to.
090: */
091: ParsedURLData data;
092:
093: /**
094: * The user agent to associate with this URL
095: */
096: String userAgent;
097:
098: /**
099: * This maps between protocol names and ParsedURLProtocolHandler instances.
100: */
101: private static Map handlersMap = null;
102:
103: /**
104: * The default protocol handler. This handler is used when
105: * other handlers fail or no match for a protocol can be
106: * found.
107: */
108: private static ParsedURLProtocolHandler defaultHandler = new ParsedURLDefaultProtocolHandler();
109:
110: private static String globalUserAgent = "Batik/1.0";
111:
112: public static String getGlobalUserAgent() {
113: return globalUserAgent;
114: }
115:
116: public static void setGlobalUserAgent(String userAgent) {
117: globalUserAgent = userAgent;
118: }
119:
120: /**
121: * Returns the shared instance of HandlersMap. This method is
122: * also responsible for initializing the handler map if this is
123: * the first time it has been requested since the class was
124: * loaded.
125: */
126: private static synchronized Map getHandlersMap() {
127: if (handlersMap != null)
128: return handlersMap;
129:
130: handlersMap = new HashMap();
131: registerHandler(new ParsedURLDataProtocolHandler());
132: registerHandler(new ParsedURLJarProtocolHandler());
133:
134: Iterator iter = Service
135: .providers(ParsedURLProtocolHandler.class);
136: while (iter.hasNext()) {
137: ParsedURLProtocolHandler handler;
138: handler = (ParsedURLProtocolHandler) iter.next();
139:
140: // System.out.println("Handler: " + handler);
141: registerHandler(handler);
142: }
143:
144: return handlersMap;
145:
146: }
147:
148: /**
149: * Returns the handler for a particular protocol. If protocol is
150: * <tt>null</tt> or no match is found in the handlers map it
151: * returns the default protocol handler.
152: * @param protocol The protocol to get a handler for.
153: */
154: public static synchronized ParsedURLProtocolHandler getHandler(
155: String protocol) {
156: if (protocol == null)
157: return defaultHandler;
158:
159: Map handlers = getHandlersMap();
160: ParsedURLProtocolHandler ret;
161: ret = (ParsedURLProtocolHandler) handlers.get(protocol);
162: if (ret == null)
163: ret = defaultHandler;
164: return ret;
165: }
166:
167: /**
168: * Registers a Protocol handler by adding it to the handlers map.
169: * If the given protocol handler returns <tt>null</tt> as it's
170: * supported protocol then it is registered as the default
171: * protocol handler.
172: * @param handler the new Protocol Handler to register
173: */
174: public static synchronized void registerHandler(
175: ParsedURLProtocolHandler handler) {
176: if (handler.getProtocolHandled() == null) {
177: defaultHandler = handler;
178: return;
179: }
180:
181: Map handlers = getHandlersMap();
182: handlers.put(handler.getProtocolHandled(), handler);
183: }
184:
185: /**
186: * This is a utility function others can call that checks if
187: * is is a GZIP stream if so it returns a GZIPInputStream that
188: * will decode the contents, otherwise it returns (or a
189: * buffered version of is) untouched.
190: * @param is Stream that may potentially be a GZIP stream.
191: */
192: public static InputStream checkGZIP(InputStream is)
193: throws IOException {
194: return ParsedURLData.checkGZIP(is);
195: }
196:
197: /**
198: * Construct a ParsedURL from the given url string.
199: * @param urlStr The string to try and parse as a URL
200: */
201: public ParsedURL(String urlStr) {
202: this .userAgent = getGlobalUserAgent();
203: this .data = parseURL(urlStr);
204: }
205:
206: /**
207: * Construct a ParsedURL from the given java.net.URL instance.
208: * This is useful if you already have a valid java.net.URL
209: * instance. This bypasses most of the parsing and hence is
210: * quicker and less prone to reinterpretation than converting the
211: * URL to a string before construction.
212: *
213: * @param url The URL to "mimic".
214: */
215: public ParsedURL(URL url) {
216: this .userAgent = getGlobalUserAgent();
217: this .data = new ParsedURLData(url);
218: }
219:
220: /**
221: * Construct a sub URL from two strings.
222: * @param baseStr The 'parent' URL. Should be complete.
223: * @param urlStr The 'sub' URL may be complete or partial.
224: * the missing pieces will be taken from the baseStr.
225: */
226: public ParsedURL(String baseStr, String urlStr) {
227: this .userAgent = getGlobalUserAgent();
228: if (baseStr != null)
229: this .data = parseURL(baseStr, urlStr);
230: else
231: this .data = parseURL(urlStr);
232: }
233:
234: /**
235: * Construct a sub URL from a base URL and a string for the sub url.
236: * @param baseURL The 'parent' URL.
237: * @param urlStr The 'sub' URL may be complete or partial.
238: * the missing pieces will be taken from the baseURL.
239: */
240: public ParsedURL(URL baseURL, String urlStr) {
241: this .userAgent = getGlobalUserAgent();
242:
243: if (baseURL != null)
244: this .data = parseURL(new ParsedURL(baseURL), urlStr);
245: else
246: this .data = parseURL(urlStr);
247: }
248:
249: /**
250: * Construct a sub URL from a base ParsedURL and a string for the sub url.
251: * @param baseURL The 'parent' URL.
252: * @param urlStr The 'sub' URL may be complete or partial.
253: * the missing pieces will be taken from the baseURL.
254: */
255: public ParsedURL(ParsedURL baseURL, String urlStr) {
256: this .userAgent = baseURL.getUserAgent();
257: if (baseURL != null)
258: this .data = parseURL(baseURL, urlStr);
259: else
260: this .data = parseURL(urlStr);
261: }
262:
263: /**
264: * Return a string rep of the URL (can be passed back into the
265: * constructor if desired).
266: */
267: public String toString() {
268: return data.toString();
269: }
270:
271: /**
272: * Implement Object.equals.
273: * Relies heavily on the contained ParsedURLData's implementation
274: * of equals.
275: */
276: public boolean equals(Object obj) {
277: if (obj == null)
278: return false;
279: if (!(obj instanceof ParsedURL))
280: return false;
281: ParsedURL purl = (ParsedURL) obj;
282: return data.equals(purl.data);
283: }
284:
285: /**
286: * Implement Object.hashCode.
287: * Relies on the contained ParsedURLData's implementation
288: * of hashCode.
289: */
290: public int hashCode() {
291: return data.hashCode();
292: }
293:
294: /**
295: * Returns true if the URL looks well formed and complete.
296: * This does not garuntee that the stream can be opened but
297: * is a good indication that things aren't totally messed up.
298: */
299: public boolean complete() {
300: return data.complete();
301: }
302:
303: /**
304: * Return the user agent current associated with this url (or
305: * null if none).
306: */
307: public String getUserAgent() {
308: return userAgent;
309: }
310:
311: /**
312: * Sets the user agent associated with this url (null clears
313: * any associated user agent).
314: */
315: public void setUserAgent(String userAgent) {
316: this .userAgent = userAgent;
317: }
318:
319: /**
320: * Returns the protocol for this URL.
321: * The protocol is everything upto the first ':'.
322: */
323: public String getProtocol() {
324: if (data.protocol == null)
325: return null;
326: return new String(data.protocol);
327: }
328:
329: /**
330: * Returns the host for this URL, if any, <tt>null</tt> if there isn't
331: * one or it doesn't make sense for the protocol.
332: */
333: public String getHost() {
334: if (data.host == null)
335: return null;
336: return new String(data.host);
337: }
338:
339: /**
340: * Returns the port on the host to connect to, if it was specified
341: * in the url that was parsed, otherwise returns -1.
342: */
343: public int getPort() {
344: return data.port;
345: }
346:
347: /**
348: * Returns the path for this URL, if any (where appropriate for
349: * the protocol this also includes the file, not just directory).
350: * Note that getPath appears in JDK 1.3 as a synonym for getFile
351: * from JDK 1.2.
352: */
353: public String getPath() {
354: if (data.path == null)
355: return null;
356: return new String(data.path);
357: }
358:
359: /**
360: * Returns the 'fragment' reference in the URL.
361: */
362: public String getRef() {
363: if (data.ref == null)
364: return null;
365: return new String(data.ref);
366: }
367:
368: /**
369: * Returns the URL up to and include the port number on
370: * the host. Does not include the path or fragment pieces.
371: */
372: public String getPortStr() {
373: return data.getPortStr();
374: }
375:
376: /**
377: * Returns the content type if available. This is only available
378: * for some protocols.
379: */
380: public String getContentType() {
381: return data.getContentType(userAgent);
382: }
383:
384: /**
385: * Returns the content encoding if available. This is only available
386: * for some protocols.
387: */
388: public String getContentEncoding() {
389: return data.getContentEncoding(userAgent);
390: }
391:
392: /**
393: * Attempt to open the stream checking for common compression
394: * types, and automatically decompressing them if found.
395: */
396: public InputStream openStream() throws IOException {
397: return data.openStream(userAgent, null);
398: }
399:
400: /**
401: * Attempt to open the stream checking for common compression
402: * types, and automatically decompressing them if found.
403: * @param mimeType The expected mime type of the content
404: * in the returned InputStream (mapped to Http accept
405: * header among other possabilities).
406: */
407: public InputStream openStream(String mimeType) throws IOException {
408: List mt = new ArrayList(1);
409: mt.add(mimeType);
410: return data.openStream(userAgent, mt.iterator());
411: }
412:
413: /**
414: * Attempt to open the stream checking for common compression
415: * types, and automatically decompressing them if found.
416: * @param mimeTypes The expected mime types of the content
417: * in the returned InputStream (mapped to Http accept
418: * header among other possabilities).
419: */
420: public InputStream openStream(String[] mimeTypes)
421: throws IOException {
422: List mt = new ArrayList(mimeTypes.length);
423: for (int i = 0; i < mimeTypes.length; i++)
424: mt.add(mimeTypes[i]);
425: return data.openStream(userAgent, mt.iterator());
426: }
427:
428: /**
429: * Attempt to open the stream checking for common compression
430: * types, and automatically decompressing them if found.
431: * @param mimeTypes The expected mime types of the content
432: * in the returned InputStream (mapped to Http accept
433: * header among other possabilities). The elements of
434: * the iterator must be strings.
435: */
436: public InputStream openStream(Iterator mimeTypes)
437: throws IOException {
438: return data.openStream(userAgent, mimeTypes);
439: }
440:
441: /**
442: * Attempt to open the stream, does no checking for compression
443: * types.
444: */
445: public InputStream openStreamRaw() throws IOException {
446: return data.openStreamRaw(userAgent, null);
447: }
448:
449: /**
450: * Attempt to open the stream, does no checking for compression
451: * types.
452: * @param mimeType The expected mime type of the content
453: * in the returned InputStream (mapped to Http accept
454: * header among other possabilities).
455: */
456: public InputStream openStreamRaw(String mimeType)
457: throws IOException {
458: List mt = new ArrayList(1);
459: mt.add(mimeType);
460: return data.openStreamRaw(userAgent, mt.iterator());
461: }
462:
463: /**
464: * Attempt to open the stream, does no checking for comression
465: * types.
466: * @param mimeTypes The expected mime types of the content
467: * in the returned InputStream (mapped to Http accept
468: * header among other possabilities).
469: */
470: public InputStream openStreamRaw(String[] mimeTypes)
471: throws IOException {
472: List mt = new ArrayList(mimeTypes.length);
473: for (int i = 0; i < mimeTypes.length; i++)
474: mt.add(mimeTypes[i]);
475: return data.openStreamRaw(userAgent, mt.iterator());
476: }
477:
478: /**
479: * Attempt to open the stream, does no checking for comression
480: * types.
481: * @param mimeTypes The expected mime types of the content
482: * in the returned InputStream (mapped to Http accept
483: * header among other possabilities). The elements of
484: * the iterator must be strings.
485: */
486: public InputStream openStreamRaw(Iterator mimeTypes)
487: throws IOException {
488: return data.openStreamRaw(userAgent, mimeTypes);
489: }
490:
491: public boolean sameFile(ParsedURL other) {
492: return data.sameFile(other.data);
493: }
494:
495: /**
496: * Parse out the protocol from a url string. Used internally to
497: * select the proper handler, all other parsing is done by
498: * the selected protocol handler.
499: */
500: protected static String getProtocol(String urlStr) {
501: if (urlStr == null)
502: return null;
503: int idx = 0, len = urlStr.length();
504:
505: if (len == 0)
506: return null;
507:
508: // Protocol is only allowed to include -+.a-zA-Z
509: // So as soon as we hit something else we know we
510: // are done (if it is a ':' then we have protocol otherwise
511: // we don't.
512: char ch = urlStr.charAt(idx);
513: while ((ch == '-') || (ch == '+') || (ch == '.')
514: || ((ch >= 'a') && (ch <= 'z'))
515: || ((ch >= 'A') && (ch <= 'Z'))) {
516: idx++;
517: if (idx == len) {
518: ch = 0;
519: break;
520: }
521: ch = urlStr.charAt(idx);
522: }
523: if (ch == ':') {
524: // Has a protocol spec...
525: return urlStr.substring(0, idx).toLowerCase();
526: }
527: return null;
528: }
529:
530: /**
531: * Factory method to construct an appropriate subclass of ParsedURLData
532: * @param urlStr the string to parse.
533: */
534: public static ParsedURLData parseURL(String urlStr) {
535: ParsedURLProtocolHandler handler = getHandler(getProtocol(urlStr));
536: return handler.parseURL(urlStr);
537: }
538:
539: /**
540: * Factory method to construct an appropriate subclass of ParsedURLData,
541: * for a sub url.
542: * @param baseStr The base URL string to parse.
543: * @param urlStr the sub URL string to parse.
544: */
545: public static ParsedURLData parseURL(String baseStr, String urlStr) {
546: if (baseStr == null)
547: return parseURL(urlStr);
548:
549: ParsedURL purl = new ParsedURL(baseStr);
550: return parseURL(purl, urlStr);
551: }
552:
553: /**
554: * Factory method to construct an appropriate subclass of ParsedURLData,
555: * for a sub url.
556: * @param baseURL The base ParsedURL to parse.
557: * @param urlStr the sub URL string to parse.
558: */
559: public static ParsedURLData parseURL(ParsedURL baseURL,
560: String urlStr) {
561: if (baseURL == null)
562: return parseURL(urlStr);
563:
564: String protocol = getProtocol(urlStr);
565: if (protocol == null)
566: protocol = baseURL.getProtocol();
567: ParsedURLProtocolHandler handler = getHandler(protocol);
568: return handler.parseURL(baseURL, urlStr);
569: }
570: }
|