001: /*
002:
003: Licensed to the Apache Software Foundation (ASF) under one or more
004: contributor license agreements. See the NOTICE file distributed with
005: this work for additional information regarding copyright ownership.
006: The ASF licenses this file to You under the Apache License, Version 2.0
007: (the "License"); you may not use this file except in compliance with
008: the License. You may obtain a copy of the License at
009:
010: http://www.apache.org/licenses/LICENSE-2.0
011:
012: Unless required by applicable law or agreed to in writing, software
013: distributed under the License is distributed on an "AS IS" BASIS,
014: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: See the License for the specific language governing permissions and
016: limitations under the License.
017:
018: */
019: package org.apache.batik.util;
020:
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.net.URL;
024: import java.util.ArrayList;
025: import java.util.HashMap;
026: import java.util.Iterator;
027: import java.util.List;
028: import java.util.Map;
029:
030: import org.apache.batik.Version;
031:
032: /**
033: * This class is used as a replacement for java.net.URL. This is done
034: * for several reasons. First unlike java.net.URL this class will
035: * accept and parse as much of a URL as possible, without throwing a
036: * MalformedURL exception. This makes it extreamly useful for simply
037: * parsing a URL string (hence it's name).
038: *
039: * Second it allows for extension of the protocols supported by the
040: * URL parser. Batik uses this to support the 'Data' protocol.
041: *
042: * Third by default it checks the streams that it opens to see if they
043: * are GZIP compressed, if so it automatically uncompresses them
044: * (avoiding opening the stream twice in the processes).
045: *
046: * It is worth noting that most real work is defered to the
047: * ParsedURLData class to which most methods are forwarded. This is
048: * done because it allows a constructor interface to ParsedURL (mostly
049: * for compatability with core URL), in spite of the fact that the
050: * real implemenation uses the protocol handlers as factories for
051: * protocol specific instances of the ParsedURLData class.
052: *
053: * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
054: * @version $Id: ParsedURL.java 482928 2006-12-06 06:57:25Z cam $
055: */
056: public class ParsedURL {
057:
058: /**
059: * The data class we defer most things to.
060: */
061: ParsedURLData data;
062:
063: /**
064: * The user agent to associate with this URL
065: */
066: String userAgent;
067:
068: /**
069: * This maps between protocol names and ParsedURLProtocolHandler instances.
070: */
071: private static Map handlersMap = null;
072:
073: /**
074: * The default protocol handler. This handler is used when
075: * other handlers fail or no match for a protocol can be
076: * found.
077: */
078: private static ParsedURLProtocolHandler defaultHandler = new ParsedURLDefaultProtocolHandler();
079:
080: private static String globalUserAgent = "Batik/"
081: + Version.getVersion();
082:
083: public static String getGlobalUserAgent() {
084: return globalUserAgent;
085: }
086:
087: public static void setGlobalUserAgent(String userAgent) {
088: globalUserAgent = userAgent;
089: }
090:
091: /**
092: * Returns the shared instance of HandlersMap. This method is
093: * also responsible for initializing the handler map if this is
094: * the first time it has been requested since the class was
095: * loaded.
096: */
097: private static synchronized Map getHandlersMap() {
098: if (handlersMap != null)
099: return handlersMap;
100:
101: handlersMap = new HashMap();
102: registerHandler(new ParsedURLDataProtocolHandler());
103: registerHandler(new ParsedURLJarProtocolHandler());
104:
105: Iterator iter = Service
106: .providers(ParsedURLProtocolHandler.class);
107: while (iter.hasNext()) {
108: ParsedURLProtocolHandler handler;
109: handler = (ParsedURLProtocolHandler) iter.next();
110:
111: // System.out.println("Handler: " + handler);
112: registerHandler(handler);
113: }
114:
115: return handlersMap;
116:
117: }
118:
119: /**
120: * Returns the handler for a particular protocol. If protocol is
121: * <tt>null</tt> or no match is found in the handlers map it
122: * returns the default protocol handler.
123: * @param protocol The protocol to get a handler for.
124: */
125: public static synchronized ParsedURLProtocolHandler getHandler(
126: String protocol) {
127: if (protocol == null)
128: return defaultHandler;
129:
130: Map handlers = getHandlersMap();
131: ParsedURLProtocolHandler ret;
132: ret = (ParsedURLProtocolHandler) handlers.get(protocol);
133: if (ret == null)
134: ret = defaultHandler;
135: return ret;
136: }
137:
138: /**
139: * Registers a Protocol handler by adding it to the handlers map.
140: * If the given protocol handler returns <tt>null</tt> as it's
141: * supported protocol then it is registered as the default
142: * protocol handler.
143: * @param handler the new Protocol Handler to register
144: */
145: public static synchronized void registerHandler(
146: ParsedURLProtocolHandler handler) {
147: if (handler.getProtocolHandled() == null) {
148: defaultHandler = handler;
149: return;
150: }
151:
152: Map handlers = getHandlersMap();
153: handlers.put(handler.getProtocolHandled(), handler);
154: }
155:
156: /**
157: * This is a utility function others can call that checks if
158: * is is a GZIP stream if so it returns a GZIPInputStream that
159: * will decode the contents, otherwise it returns (or a
160: * buffered version of is) untouched.
161: * @param is Stream that may potentially be a GZIP stream.
162: */
163: public static InputStream checkGZIP(InputStream is)
164: throws IOException {
165: return ParsedURLData.checkGZIP(is);
166: }
167:
168: /**
169: * Construct a ParsedURL from the given url string.
170: * @param urlStr The string to try and parse as a URL
171: */
172: public ParsedURL(String urlStr) {
173: userAgent = getGlobalUserAgent();
174: data = parseURL(urlStr);
175: }
176:
177: /**
178: * Construct a ParsedURL from the given java.net.URL instance.
179: * This is useful if you already have a valid java.net.URL
180: * instance. This bypasses most of the parsing and hence is
181: * quicker and less prone to reinterpretation than converting the
182: * URL to a string before construction.
183: *
184: * @param url The URL to "mimic".
185: */
186: public ParsedURL(URL url) {
187: userAgent = getGlobalUserAgent();
188: data = new ParsedURLData(url);
189: }
190:
191: /**
192: * Construct a sub URL from two strings.
193: * @param baseStr The 'parent' URL. Should be complete.
194: * @param urlStr The 'sub' URL may be complete or partial.
195: * the missing pieces will be taken from the baseStr.
196: */
197: public ParsedURL(String baseStr, String urlStr) {
198: userAgent = getGlobalUserAgent();
199: if (baseStr != null)
200: data = parseURL(baseStr, urlStr);
201: else
202: data = parseURL(urlStr);
203: }
204:
205: /**
206: * Construct a sub URL from a base URL and a string for the sub url.
207: * @param baseURL The 'parent' URL.
208: * @param urlStr The 'sub' URL may be complete or partial.
209: * the missing pieces will be taken from the baseURL.
210: */
211: public ParsedURL(URL baseURL, String urlStr) {
212: userAgent = getGlobalUserAgent();
213:
214: if (baseURL != null)
215: data = parseURL(new ParsedURL(baseURL), urlStr);
216: else
217: data = parseURL(urlStr);
218: }
219:
220: /**
221: * Construct a sub URL from a base ParsedURL and a string for the sub url.
222: * @param baseURL The 'parent' URL.
223: * @param urlStr The 'sub' URL may be complete or partial.
224: * the missing pieces will be taken from the baseURL.
225: */
226: public ParsedURL(ParsedURL baseURL, String urlStr) {
227: userAgent = baseURL.getUserAgent();
228: if (baseURL != null)
229: data = parseURL(baseURL, urlStr);
230: else
231: data = parseURL(urlStr);
232: }
233:
234: /**
235: * Return a string rep of the URL (can be passed back into the
236: * constructor if desired).
237: */
238: public String toString() {
239: return data.toString();
240: }
241:
242: /**
243: * Implement Object.equals.
244: * Relies heavily on the contained ParsedURLData's implementation
245: * of equals.
246: */
247: public boolean equals(Object obj) {
248: if (obj == null)
249: return false;
250: if (!(obj instanceof ParsedURL))
251: return false;
252: ParsedURL purl = (ParsedURL) obj;
253: return data.equals(purl.data);
254: }
255:
256: /**
257: * Implement Object.hashCode.
258: * Relies on the contained ParsedURLData's implementation
259: * of hashCode.
260: */
261: public int hashCode() {
262: return data.hashCode();
263: }
264:
265: /**
266: * Returns true if the URL looks well formed and complete.
267: * This does not garuntee that the stream can be opened but
268: * is a good indication that things aren't totally messed up.
269: */
270: public boolean complete() {
271: return data.complete();
272: }
273:
274: /**
275: * Return the user agent current associated with this url (or
276: * null if none).
277: */
278: public String getUserAgent() {
279: return userAgent;
280: }
281:
282: /**
283: * Sets the user agent associated with this url (null clears
284: * any associated user agent).
285: */
286: public void setUserAgent(String userAgent) {
287: this .userAgent = userAgent;
288: }
289:
290: /**
291: * Returns the protocol for this URL.
292: * The protocol is everything upto the first ':'.
293: */
294: public String getProtocol() {
295: if (data.protocol == null)
296: return null;
297: return data.protocol;
298: }
299:
300: /**
301: * Returns the host for this URL, if any, <tt>null</tt> if there isn't
302: * one or it doesn't make sense for the protocol.
303: */
304: public String getHost() {
305: if (data.host == null)
306: return null;
307: return data.host;
308: }
309:
310: /**
311: * Returns the port on the host to connect to, if it was specified
312: * in the url that was parsed, otherwise returns -1.
313: */
314: public int getPort() {
315: return data.port;
316: }
317:
318: /**
319: * Returns the path for this URL, if any (where appropriate for
320: * the protocol this also includes the file, not just directory).
321: * Note that getPath appears in JDK 1.3 as a synonym for getFile
322: * from JDK 1.2.
323: */
324: public String getPath() {
325: if (data.path == null)
326: return null;
327: return data.path;
328: }
329:
330: /**
331: * Returns the 'fragment' reference in the URL.
332: */
333: public String getRef() {
334: if (data.ref == null)
335: return null;
336: return data.ref;
337: }
338:
339: /**
340: * Returns the URL up to and include the port number on
341: * the host. Does not include the path or fragment pieces.
342: */
343: public String getPortStr() {
344: return data.getPortStr();
345: }
346:
347: /**
348: * Returns the content type if available. This is only available
349: * for some protocols.
350: */
351: public String getContentType() {
352: return data.getContentType(userAgent);
353: }
354:
355: /**
356: * Returns the content type's type/subtype, if available. This is
357: * only available for some protocols.
358: */
359: public String getContentTypeMediaType() {
360: return data.getContentTypeMediaType(userAgent);
361: }
362:
363: /**
364: * Returns the content type's charset parameter, if available. This is
365: * only available for some protocols.
366: */
367: public String getContentTypeCharset() {
368: return data.getContentTypeCharset(userAgent);
369: }
370:
371: /**
372: * Returns whether the Content-Type header has the given parameter.
373: */
374: public boolean hasContentTypeParameter(String param) {
375: return data.hasContentTypeParameter(userAgent, param);
376: }
377:
378: /**
379: * Returns the content encoding if available. This is only available
380: * for some protocols.
381: */
382: public String getContentEncoding() {
383: return data.getContentEncoding(userAgent);
384: }
385:
386: /**
387: * Attempt to open the stream checking for common compression
388: * types, and automatically decompressing them if found.
389: */
390: public InputStream openStream() throws IOException {
391: return data.openStream(userAgent, null);
392: }
393:
394: /**
395: * Attempt to open the stream checking for common compression
396: * types, and automatically decompressing them if found.
397: * @param mimeType The expected mime type of the content
398: * in the returned InputStream (mapped to Http accept
399: * header among other possabilities).
400: */
401: public InputStream openStream(String mimeType) throws IOException {
402: List mt = new ArrayList(1);
403: mt.add(mimeType);
404: return data.openStream(userAgent, mt.iterator());
405: }
406:
407: /**
408: * Attempt to open the stream checking for common compression
409: * types, and automatically decompressing them if found.
410: * @param mimeTypes The expected mime types of the content
411: * in the returned InputStream (mapped to Http accept
412: * header among other possabilities).
413: */
414: public InputStream openStream(String[] mimeTypes)
415: throws IOException {
416: List mt = new ArrayList(mimeTypes.length);
417: for (int i = 0; i < mimeTypes.length; i++)
418: mt.add(mimeTypes[i]);
419: return data.openStream(userAgent, mt.iterator());
420: }
421:
422: /**
423: * Attempt to open the stream checking for common compression
424: * types, and automatically decompressing them if found.
425: * @param mimeTypes The expected mime types of the content
426: * in the returned InputStream (mapped to Http accept
427: * header among other possabilities). The elements of
428: * the iterator must be strings.
429: */
430: public InputStream openStream(Iterator mimeTypes)
431: throws IOException {
432: return data.openStream(userAgent, mimeTypes);
433: }
434:
435: /**
436: * Attempt to open the stream, does no checking for compression
437: * types.
438: */
439: public InputStream openStreamRaw() throws IOException {
440: return data.openStreamRaw(userAgent, null);
441: }
442:
443: /**
444: * Attempt to open the stream, does no checking for compression
445: * types.
446: * @param mimeType The expected mime type of the content
447: * in the returned InputStream (mapped to Http accept
448: * header among other possabilities).
449: */
450: public InputStream openStreamRaw(String mimeType)
451: throws IOException {
452: List mt = new ArrayList(1);
453: mt.add(mimeType);
454: return data.openStreamRaw(userAgent, mt.iterator());
455: }
456:
457: /**
458: * Attempt to open the stream, does no checking for comression
459: * types.
460: * @param mimeTypes The expected mime types of the content
461: * in the returned InputStream (mapped to Http accept
462: * header among other possabilities).
463: */
464: public InputStream openStreamRaw(String[] mimeTypes)
465: throws IOException {
466: List mt = new ArrayList(mimeTypes.length);
467: for (int i = 0; i < mimeTypes.length; i++)
468: mt.add(mimeTypes[i]);
469: return data.openStreamRaw(userAgent, mt.iterator());
470: }
471:
472: /**
473: * Attempt to open the stream, does no checking for comression
474: * types.
475: * @param mimeTypes The expected mime types of the content
476: * in the returned InputStream (mapped to Http accept
477: * header among other possabilities). The elements of
478: * the iterator must be strings.
479: */
480: public InputStream openStreamRaw(Iterator mimeTypes)
481: throws IOException {
482: return data.openStreamRaw(userAgent, mimeTypes);
483: }
484:
485: public boolean sameFile(ParsedURL other) {
486: return data.sameFile(other.data);
487: }
488:
489: /**
490: * Parse out the protocol from a url string. Used internally to
491: * select the proper handler, all other parsing is done by
492: * the selected protocol handler.
493: */
494: protected static String getProtocol(String urlStr) {
495: if (urlStr == null)
496: return null;
497: int idx = 0, len = urlStr.length();
498:
499: if (len == 0)
500: return null;
501:
502: // Protocol is only allowed to include -+.a-zA-Z
503: // So as soon as we hit something else we know we
504: // are done (if it is a ':' then we have protocol otherwise
505: // we don't.
506: char ch = urlStr.charAt(idx);
507: while ((ch == '-')
508: || // todo this might be more efficient with a long mask
509: (ch == '+')
510: || // which has a bit set for each valid char.
511: (ch == '.')
512: || // check feasability
513: ((ch >= 'a') && (ch <= 'z'))
514: || ((ch >= 'A') && (ch <= 'Z'))) {
515: idx++;
516: if (idx == len) {
517: ch = 0;
518: break;
519: }
520: ch = urlStr.charAt(idx);
521: }
522: if (ch == ':') {
523: // Has a protocol spec...
524: return urlStr.substring(0, idx).toLowerCase();
525: }
526: return null;
527: }
528:
529: /**
530: * Factory method to construct an appropriate subclass of ParsedURLData
531: * @param urlStr the string to parse.
532: */
533: public static ParsedURLData parseURL(String urlStr) {
534: ParsedURLProtocolHandler handler = getHandler(getProtocol(urlStr));
535: return handler.parseURL(urlStr);
536: }
537:
538: /**
539: * Factory method to construct an appropriate subclass of ParsedURLData,
540: * for a sub url.
541: * @param baseStr The base URL string to parse.
542: * @param urlStr the sub URL string to parse.
543: */
544: public static ParsedURLData parseURL(String baseStr, String urlStr) {
545: if (baseStr == null)
546: return parseURL(urlStr);
547:
548: ParsedURL purl = new ParsedURL(baseStr);
549: return parseURL(purl, urlStr);
550: }
551:
552: /**
553: * Factory method to construct an appropriate subclass of ParsedURLData,
554: * for a sub url.
555: * @param baseURL The base ParsedURL to parse.
556: * @param urlStr the sub URL string to parse.
557: */
558: public static ParsedURLData parseURL(ParsedURL baseURL,
559: String urlStr) {
560: if (baseURL == null)
561: return parseURL(urlStr);
562:
563: String protocol = getProtocol(urlStr);
564: if (protocol == null)
565: protocol = baseURL.getProtocol();
566: ParsedURLProtocolHandler handler = getHandler(protocol);
567: return handler.parseURL(baseURL, urlStr);
568: }
569: }
|