001: /*
002:
003: ============================================================================
004: The Apache Software License, Version 1.1
005: ============================================================================
006:
007: Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
008:
009: Redistribution and use in source and binary forms, with or without modifica-
010: tion, are permitted provided that the following conditions are met:
011:
012: 1. Redistributions of source code must retain the above copyright notice,
013: this list of conditions and the following disclaimer.
014:
015: 2. Redistributions in binary form must reproduce the above copyright notice,
016: this list of conditions and the following disclaimer in the documentation
017: and/or other materials provided with the distribution.
018:
019: 3. The end-user documentation included with the redistribution, if any, must
020: include the following acknowledgment: "This product includes software
021: developed by the Apache Software Foundation (http://www.apache.org/)."
022: Alternately, this acknowledgment may appear in the software itself, if
023: and wherever such third-party acknowledgments normally appear.
024:
025: 4. The names "Batik" and "Apache Software Foundation" must not be
026: used to endorse or promote products derived from this software without
027: prior written permission. For written permission, please contact
028: apache@apache.org.
029:
030: 5. Products derived from this software may not be called "Apache", nor may
031: "Apache" appear in their name, without prior written permission of the
032: Apache Software Foundation.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
035: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
036: FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
037: APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
038: INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
039: DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
040: OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
041: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
042: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
043: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044:
045: This software consists of voluntary contributions made by many individuals
046: on behalf of the Apache Software Foundation. For more information on the
047: Apache Software Foundation, please see <http://www.apache.org/>.
048:
049: */
050:
051: package org.apache.batik.util;
052:
053: import java.net.MalformedURLException;
054: import java.net.URL;
055:
056: /**
057: * The default protocol handler this handles the most common
058: * protocols, such as 'file' 'http' 'ftp'.
059: * The parsing should be general enought to support most
060: * 'normal' URL formats, so in many cases
061: *
062: * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
063: * @version $Id$
064: */
065: public class ParsedURLDefaultProtocolHandler extends
066: AbstractParsedURLProtocolHandler {
067:
068: /**
069: * Default constructor sets no protocol so this becomes
070: * default handler.
071: */
072: public ParsedURLDefaultProtocolHandler() {
073: super (null);
074: }
075:
076: /**
077: * Subclass constructor allows subclasses to provide protocol,
078: * to be handled.
079: */
080: protected ParsedURLDefaultProtocolHandler(String protocol) {
081: super (protocol);
082: }
083:
084: /**
085: * Subclasses can override these method to construct alternate
086: * subclasses of ParsedURLData.
087: */
088: protected ParsedURLData constructParsedURLData() {
089: return new ParsedURLData();
090: }
091:
092: /**
093: * Subclasses can override these method to construct alternate
094: * subclasses of ParsedURLData.
095: * @param the java.net.URL class we reference.
096: */
097: protected ParsedURLData constructParsedURLData(URL url) {
098: return new ParsedURLData(url);
099: }
100:
101: /**
102: * Parses the string and returns the results of parsing in the
103: * ParsedURLData object.
104: * @param urlStr the string to parse as a URL.
105: */
106: public ParsedURLData parseURL(String urlStr) {
107: try {
108: URL url = new URL(urlStr);
109: // System.err.println("System Parse: " + urlStr);
110: return constructParsedURLData(url);
111: } catch (MalformedURLException mue) {
112: // Built in URL wouldn't take it...
113: // mue.printStackTrace();
114: }
115:
116: // new Exception("Custom Parse: " + urlStr).printStackTrace();
117: // System.err.println("Custom Parse: " + urlStr);
118:
119: ParsedURLData ret = constructParsedURLData();
120:
121: if (urlStr == null)
122: return ret;
123:
124: int pidx = 0, idx;
125: int len = urlStr.length();
126:
127: // Pull fragement id off first...
128: idx = urlStr.indexOf('#');
129: ret.ref = null;
130: if (idx != -1) {
131: if (idx + 1 < len)
132: ret.ref = urlStr.substring(idx + 1);
133: urlStr = urlStr.substring(0, idx);
134: len = urlStr.length();
135: }
136:
137: if (len == 0)
138: return ret;
139:
140: // Protocol is only allowed to include -+.a-zA-Z
141: // So as soon as we hit something else we know we
142: // are done (if it is a ':' then we have protocol otherwise
143: // we don't.
144: idx = 0;
145: char ch = urlStr.charAt(idx);
146: while ((ch == '-') || (ch == '+') || (ch == '.')
147: || ((ch >= 'a') && (ch <= 'z'))
148: || ((ch >= 'A') && (ch <= 'Z'))) {
149: idx++;
150: if (idx == len) {
151: ch = 0;
152: break;
153: }
154: ch = urlStr.charAt(idx);
155: }
156:
157: if (ch == ':') {
158: // Has a protocol spec...
159: ret.protocol = urlStr.substring(pidx, idx).toLowerCase();
160: pidx = idx + 1; // Skip ':'
161: }
162:
163: // See if we have host/port spec.
164: idx = urlStr.indexOf('/');
165: if ((idx == -1)
166: || ((pidx + 2 < len) && (urlStr.charAt(pidx) == '/') && (urlStr
167: .charAt(pidx + 1) == '/'))) {
168: // No slashes (apache.org) or a double slash
169: // (//apache.org/....) so
170: // we should have host[:port] before next slash.
171: if (idx != -1)
172: pidx += 2; // Skip double slash...
173:
174: idx = urlStr.indexOf('/', pidx); // find end of host:Port spec
175: String hostPort;
176: if (idx == -1)
177: // Just host and port nothing following...
178: hostPort = urlStr.substring(pidx);
179: else
180: // Path spec follows...
181: hostPort = urlStr.substring(pidx, idx);
182:
183: int hidx = idx; // Remember location of '/'
184:
185: // pull apart host and port number...
186: idx = hostPort.indexOf(':');
187: ret.port = -1;
188: if (idx == -1) {
189: // Just Host...
190: if (hostPort.length() == 0)
191: ret.host = null;
192: else
193: ret.host = hostPort;
194: } else {
195: // Host and port
196: if (idx == 0)
197: ret.host = null;
198: else
199: ret.host = hostPort.substring(0, idx);
200:
201: if (idx + 1 < hostPort.length()) {
202: String portStr = hostPort.substring(idx + 1);
203: try {
204: ret.port = Integer.parseInt(portStr);
205: } catch (NumberFormatException nfe) {
206: // bad port leave as '-1'
207: }
208: }
209: }
210: if (((ret.host == null) || (ret.host.indexOf('.') == -1))
211: && (ret.port == -1))
212: // no '.' in a host spec??? and no port, probably
213: // just a path.
214: ret.host = null;
215: else
216: pidx = hidx;
217: }
218:
219: if ((pidx == -1) || (pidx >= len))
220: return ret; // Nothing follows
221:
222: ret.path = urlStr.substring(pidx);
223: return ret;
224: }
225:
226: public static String unescapeStr(String str) {
227: int idx = str.indexOf('%');
228: if (idx == -1)
229: return str; // quick out..
230:
231: int prev = 0;
232: StringBuffer ret = new StringBuffer();
233: while (idx != -1) {
234: if (idx != prev)
235: ret.append(str.substring(prev, idx));
236:
237: if (idx + 2 >= str.length())
238: break;
239: prev = idx + 3;
240: idx = str.indexOf('%', prev);
241:
242: int ch1 = charToHex(str.charAt(idx + 1));
243: int ch2 = charToHex(str.charAt(idx + 1));
244: if ((ch1 == -1) || (ch2 == -1))
245: continue;
246: ret.append((char) (ch1 << 4 | ch2));
247: }
248:
249: return ret.toString();
250: }
251:
252: public static int charToHex(int ch) {
253: switch (ch) {
254: case '0':
255: case '1':
256: case '2':
257: case '3':
258: case '4':
259: case '5':
260: case '6':
261: case '7':
262: case '8':
263: case '9':
264: return ch - '0';
265: case 'a':
266: case 'A':
267: return 10;
268: case 'b':
269: case 'B':
270: return 11;
271: case 'c':
272: case 'C':
273: return 12;
274: case 'd':
275: case 'D':
276: return 13;
277: case 'e':
278: case 'E':
279: return 14;
280: case 'f':
281: case 'F':
282: return 15;
283: default:
284: return -1;
285: }
286: }
287:
288: /**
289: * Parses the string as a sub URL of baseURL, and returns the
290: * results of parsing in the ParsedURLData object.
291: * @param baseURL the base url for parsing.
292: * @param urlStr the string to parse as a URL.
293: */
294: public ParsedURLData parseURL(ParsedURL baseURL, String urlStr) {
295: // Reference to same document (including fragment, and query).
296: if (urlStr.length() == 0)
297: return baseURL.data;
298:
299: // System.err.println("Base: " + baseURL + "\n" +
300: // "Sub: " + urlStr);
301:
302: int idx = 0, len = urlStr.length();
303: if (len == 0)
304: return baseURL.data;
305:
306: // Protocol is only allowed to include -+.a-zA-Z
307: // So as soon as we hit something else we know we
308: // are done (if it is a ':' then we have protocol otherwise
309: // we don't.
310: char ch = urlStr.charAt(idx);
311: while ((ch == '-') || (ch == '+') || (ch == '.')
312: || ((ch >= 'a') && (ch <= 'z'))
313: || ((ch >= 'A') && (ch <= 'Z'))) {
314: idx++;
315: if (idx == len) {
316: ch = 0;
317: break;
318: }
319: ch = urlStr.charAt(idx);
320: }
321: String protocol = null;
322: if (ch == ':') {
323: // Has a protocol spec...
324: protocol = urlStr.substring(0, idx).toLowerCase();
325: }
326:
327: if (protocol != null) {
328: // Temporary if we have a protocol then assume absolute
329: // URL. Technically this is the correct handling but much
330: // software supports relative URLs with a protocol that
331: // matches the base URL's protocol.
332: // if (true)
333: // return parseURL(urlStr);
334: if (!protocol.equals(baseURL.getProtocol()))
335: // Different protocols, assume absolute URL ignore base...
336: return parseURL(urlStr);
337:
338: // Same protocols, if char after ':' is a '/' then it's
339: // still absolute...
340: idx++;
341: if (idx == urlStr.length())
342: // Just a Protocol???
343: return parseURL(urlStr);
344:
345: if (urlStr.charAt(idx) == '/')
346: // Absolute URL...
347: return parseURL(urlStr);
348:
349: // Still relative just drop the protocol (we will pick it
350: // back up from the baseURL later...).
351: urlStr = urlStr.substring(idx);
352: }
353:
354: if (urlStr.startsWith("/")) {
355: if ((urlStr.length() > 1) && (urlStr.charAt(1) == '/')) {
356: // Relative but only uses protocol from base
357: return parseURL(baseURL.getProtocol() + ":" + urlStr);
358: }
359: // Relative 'absolute' path, uses protocol and authority
360: // (host) from base
361: return parseURL(baseURL.getPortStr() + urlStr);
362: }
363:
364: if (urlStr.startsWith("#")) {
365: String base = baseURL.getPortStr();
366: if (baseURL.getPath() != null)
367: base += baseURL.getPath();
368: return parseURL(base + urlStr);
369: }
370:
371: String path = baseURL.getPath();
372: // No path? well we will treat this as being relative to it's self.
373: if (path == null)
374: path = "";
375: idx = path.lastIndexOf('/');
376: if (idx == -1)
377: // baseURL is just a filename (in current dir) so use current dir
378: // as base of new URL.
379: path = "";
380: else
381: path = path.substring(0, idx + 1);
382:
383: // System.err.println("Base Path: " + path);
384: // System.err.println("Base PortStr: " + baseURL.getPortStr());
385: return parseURL(baseURL.getPortStr() + path + urlStr);
386: }
387: }
|