001: /*
002: * Copyright 1999,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.catalina.util;
018:
019: import java.io.Serializable;
020: import java.net.MalformedURLException;
021:
022: /**
023: * <p><strong>URL</strong> is designed to provide public APIs for parsing
024: * and synthesizing Uniform Resource Locators as similar as possible to the
025: * APIs of <code>java.net.URL</code>, but without the ability to open a
026: * stream or connection. One of the consequences of this is that you can
027: * construct URLs for protocols for which a URLStreamHandler is not
028: * available (such as an "https" URL when JSSE is not installed).</p>
029: *
030: * <p><strong>WARNING</strong> - This class assumes that the string
031: * representation of a URL conforms to the <code>spec</code> argument
032: * as described in RFC 2396 "Uniform Resource Identifiers: Generic Syntax":
033: * <pre>
034: * <scheme>//<authority><path>?<query>#<fragment>
035: * </pre></p>
036: *
037: * <p><strong>FIXME</strong> - This class really ought to end up in a Commons
038: * package someplace.</p>
039: *
040: * @author Craig R. McClanahan
041: * @version $Revision: 1.2 $ $Date: 2004/02/27 14:58:51 $
042: */
043:
044: public final class URL implements Serializable {
045:
046: // ----------------------------------------------------------- Constructors
047:
048: /**
049: * Create a URL object from the specified String representation.
050: *
051: * @param spec String representation of the URL
052: *
053: * @exception MalformedURLException if the string representation
054: * cannot be parsed successfully
055: */
056: public URL(String spec) throws MalformedURLException {
057:
058: this (null, spec);
059:
060: }
061:
062: /**
063: * Create a URL object by parsing a string representation relative
064: * to a specified context. Based on logic from JDK 1.3.1's
065: * <code>java.net.URL</code>.
066: *
067: * @param context URL against which the relative representation
068: * is resolved
069: * @param spec String representation of the URL (usually relative)
070: *
071: * @exception MalformedURLException if the string representation
072: * cannot be parsed successfully
073: */
074: public URL(URL context, String spec) throws MalformedURLException {
075:
076: String original = spec;
077: int i, limit, c;
078: int start = 0;
079: String newProtocol = null;
080: boolean aRef = false;
081:
082: try {
083:
084: // Eliminate leading and trailing whitespace
085: limit = spec.length();
086: while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
087: limit--;
088: }
089: while ((start < limit) && (spec.charAt(start) <= ' ')) {
090: start++;
091: }
092:
093: // If the string representation starts with "url:", skip it
094: if (spec.regionMatches(true, start, "url:", 0, 4)) {
095: start += 4;
096: }
097:
098: // Is this a ref relative to the context URL?
099: if ((start < spec.length()) && (spec.charAt(start) == '#')) {
100: aRef = true;
101: }
102:
103: // Parse out the new protocol
104: for (i = start; !aRef && (i < limit)
105: && ((c = spec.charAt(i)) != '/'); i++) {
106: if (c == ':') {
107: String s = spec.substring(start, i).toLowerCase();
108: // Assume all protocols are valid
109: newProtocol = s;
110: start = i + 1;
111: break;
112: }
113: }
114:
115: // Only use our context if the protocols match
116: protocol = newProtocol;
117: if ((context != null)
118: && ((newProtocol == null) || newProtocol
119: .equalsIgnoreCase(context.getProtocol()))) {
120: // If the context is a hierarchical URL scheme and the spec
121: // contains a matching scheme then maintain backwards
122: // compatibility and treat it as if the spec didn't contain
123: // the scheme; see 5.2.3 of RFC2396
124: if ((context.getPath() != null)
125: && (context.getPath().startsWith("/")))
126: newProtocol = null;
127: if (newProtocol == null) {
128: protocol = context.getProtocol();
129: authority = context.getAuthority();
130: userInfo = context.getUserInfo();
131: host = context.getHost();
132: port = context.getPort();
133: file = context.getFile();
134: int question = file.lastIndexOf("?");
135: if (question < 0)
136: path = file;
137: else
138: path = file.substring(0, question);
139: }
140: }
141:
142: if (protocol == null)
143: throw new MalformedURLException("no protocol: "
144: + original);
145:
146: // Parse out any ref portion of the spec
147: i = spec.indexOf('#', start);
148: if (i >= 0) {
149: ref = spec.substring(i + 1, limit);
150: limit = i;
151: }
152:
153: // Parse the remainder of the spec in a protocol-specific fashion
154: parse(spec, start, limit);
155: if (context != null)
156: normalize();
157:
158: } catch (MalformedURLException e) {
159: throw e;
160: } catch (Exception e) {
161: throw new MalformedURLException(e.toString());
162: }
163:
164: }
165:
166: /**
167: * Create a URL object from the specified components. The default port
168: * number for the specified protocol will be used.
169: *
170: * @param protocol Name of the protocol to use
171: * @param host Name of the host addressed by this protocol
172: * @param file Filename on the specified host
173: *
174: * @exception MalformedURLException is never thrown, but present for
175: * compatible APIs
176: */
177: public URL(String protocol, String host, String file)
178: throws MalformedURLException {
179:
180: this (protocol, host, -1, file);
181:
182: }
183:
184: /**
185: * Create a URL object from the specified components. Specifying a port
186: * number of -1 indicates that the URL should use the default port for
187: * that protocol. Based on logic from JDK 1.3.1's
188: * <code>java.net.URL</code>.
189: *
190: * @param protocol Name of the protocol to use
191: * @param host Name of the host addressed by this protocol
192: * @param port Port number, or -1 for the default port for this protocol
193: * @param file Filename on the specified host
194: *
195: * @exception MalformedURLException is never thrown, but present for
196: * compatible APIs
197: */
198: public URL(String protocol, String host, int port, String file)
199: throws MalformedURLException {
200:
201: this .protocol = protocol;
202: this .host = host;
203: this .port = port;
204:
205: int hash = file.indexOf('#');
206: this .file = hash < 0 ? file : file.substring(0, hash);
207: this .ref = hash < 0 ? null : file.substring(hash + 1);
208: int question = file.lastIndexOf('?');
209: if (question >= 0) {
210: query = file.substring(question + 1);
211: path = file.substring(0, question);
212: } else
213: path = file;
214:
215: if ((host != null) && (host.length() > 0))
216: authority = (port == -1) ? host : host + ":" + port;
217:
218: }
219:
220: // ----------------------------------------------------- Instance Variables
221:
222: /**
223: * The authority part of the URL.
224: */
225: private String authority = null;
226:
227: /**
228: * The filename part of the URL.
229: */
230: private String file = null;
231:
232: /**
233: * The host name part of the URL.
234: */
235: private String host = null;
236:
237: /**
238: * The path part of the URL.
239: */
240: private String path = null;
241:
242: /**
243: * The port number part of the URL.
244: */
245: private int port = -1;
246:
247: /**
248: * The protocol name part of the URL.
249: */
250: private String protocol = null;
251:
252: /**
253: * The query part of the URL.
254: */
255: private String query = null;
256:
257: /**
258: * The reference part of the URL.
259: */
260: private String ref = null;
261:
262: /**
263: * The user info part of the URL.
264: */
265: private String userInfo = null;
266:
267: // --------------------------------------------------------- Public Methods
268:
269: /**
270: * Compare two URLs for equality. The result is <code>true</code> if and
271: * only if the argument is not null, and is a <code>URL</code> object
272: * that represents the same <code>URL</code> as this object. Two
273: * <code>URLs</code> are equal if they have the same protocol and
274: * reference the same host, the same port number on the host,
275: * and the same file and anchor on the host.
276: *
277: * @param obj The URL to compare against
278: */
279: public boolean equals(Object obj) {
280:
281: if (obj == null)
282: return (false);
283: if (!(obj instanceof URL))
284: return (false);
285: URL other = (URL) obj;
286: if (!sameFile(other))
287: return (false);
288: return (compare(ref, other.getRef()));
289:
290: }
291:
292: /**
293: * Return the authority part of the URL.
294: */
295: public String getAuthority() {
296:
297: return (this .authority);
298:
299: }
300:
301: /**
302: * Return the filename part of the URL. <strong>NOTE</strong> - For
303: * compatibility with <code>java.net.URL</code>, this value includes
304: * the query string if there was one. For just the path portion,
305: * call <code>getPath()</code> instead.
306: */
307: public String getFile() {
308:
309: if (file == null)
310: return ("");
311: return (this .file);
312:
313: }
314:
315: /**
316: * Return the host name part of the URL.
317: */
318: public String getHost() {
319:
320: return (this .host);
321:
322: }
323:
324: /**
325: * Return the path part of the URL.
326: */
327: public String getPath() {
328:
329: if (this .path == null)
330: return ("");
331: return (this .path);
332:
333: }
334:
335: /**
336: * Return the port number part of the URL.
337: */
338: public int getPort() {
339:
340: return (this .port);
341:
342: }
343:
344: /**
345: * Return the protocol name part of the URL.
346: */
347: public String getProtocol() {
348:
349: return (this .protocol);
350:
351: }
352:
353: /**
354: * Return the query part of the URL.
355: */
356: public String getQuery() {
357:
358: return (this .query);
359:
360: }
361:
362: /**
363: * Return the reference part of the URL.
364: */
365: public String getRef() {
366:
367: return (this .ref);
368:
369: }
370:
371: /**
372: * Return the user info part of the URL.
373: */
374: public String getUserInfo() {
375:
376: return (this .userInfo);
377:
378: }
379:
380: /**
381: * Normalize the <code>path</code> (and therefore <code>file</code>)
382: * portions of this URL.
383: * <p>
384: * <strong>NOTE</strong> - This method is not part of the public API
385: * of <code>java.net.URL</code>, but is provided as a value added
386: * service of this implementation.
387: *
388: * @exception MalformedURLException if a normalization error occurs,
389: * such as trying to move about the hierarchical root
390: */
391: public void normalize() throws MalformedURLException {
392:
393: // Special case for null path
394: if (path == null) {
395: if (query != null)
396: file = "?" + query;
397: else
398: file = "";
399: return;
400: }
401:
402: // Create a place for the normalized path
403: String normalized = path;
404: if (normalized.equals("/.")) {
405: path = "/";
406: if (query != null)
407: file = path + "?" + query;
408: else
409: file = path;
410: return;
411: }
412:
413: // Normalize the slashes and add leading slash if necessary
414: if (normalized.indexOf('\\') >= 0)
415: normalized = normalized.replace('\\', '/');
416: if (!normalized.startsWith("/"))
417: normalized = "/" + normalized;
418:
419: // Resolve occurrences of "//" in the normalized path
420: while (true) {
421: int index = normalized.indexOf("//");
422: if (index < 0)
423: break;
424: normalized = normalized.substring(0, index)
425: + normalized.substring(index + 1);
426: }
427:
428: // Resolve occurrences of "/./" in the normalized path
429: while (true) {
430: int index = normalized.indexOf("/./");
431: if (index < 0)
432: break;
433: normalized = normalized.substring(0, index)
434: + normalized.substring(index + 2);
435: }
436:
437: // Resolve occurrences of "/../" in the normalized path
438: while (true) {
439: int index = normalized.indexOf("/../");
440: if (index < 0)
441: break;
442: if (index == 0)
443: throw new MalformedURLException(
444: "Invalid relative URL reference");
445: int index2 = normalized.lastIndexOf('/', index - 1);
446: normalized = normalized.substring(0, index2)
447: + normalized.substring(index + 3);
448: }
449:
450: // Resolve occurrences of "/." at the end of the normalized path
451: if (normalized.endsWith("/."))
452: normalized = normalized.substring(0,
453: normalized.length() - 1);
454:
455: // Resolve occurrences of "/.." at the end of the normalized path
456: if (normalized.endsWith("/..")) {
457: int index = normalized.length() - 3;
458: int index2 = normalized.lastIndexOf('/', index - 1);
459: if (index2 < 0)
460: throw new MalformedURLException(
461: "Invalid relative URL reference");
462: normalized = normalized.substring(0, index2 + 1);
463: }
464:
465: // Return the normalized path that we have completed
466: path = normalized;
467: if (query != null)
468: file = path + "?" + query;
469: else
470: file = path;
471:
472: }
473:
474: /**
475: * Compare two URLs, excluding the "ref" fields. Returns <code>true</code>
476: * if this <code>URL</code> and the <code>other</code> argument both refer
477: * to the same resource. The two <code>URLs</code> might not both contain
478: * the same anchor.
479: */
480: public boolean sameFile(URL other) {
481:
482: if (!compare(protocol, other.getProtocol()))
483: return (false);
484: if (!compare(host, other.getHost()))
485: return (false);
486: if (port != other.getPort())
487: return (false);
488: if (!compare(file, other.getFile()))
489: return (false);
490: return (true);
491:
492: }
493:
494: /**
495: * Return a string representation of this URL. This follow the rules in
496: * RFC 2396, Section 5.2, Step 7.
497: */
498: public String toExternalForm() {
499:
500: StringBuffer sb = new StringBuffer();
501: if (protocol != null) {
502: sb.append(protocol);
503: sb.append(":");
504: }
505: if (authority != null) {
506: sb.append("//");
507: sb.append(authority);
508: }
509: if (path != null)
510: sb.append(path);
511: if (query != null) {
512: sb.append('?');
513: sb.append(query);
514: }
515: if (ref != null) {
516: sb.append('#');
517: sb.append(ref);
518: }
519: return (sb.toString());
520:
521: }
522:
523: /**
524: * Return a string representation of this object.
525: */
526: public String toString() {
527:
528: StringBuffer sb = new StringBuffer("URL[");
529: sb.append("authority=");
530: sb.append(authority);
531: sb.append(", file=");
532: sb.append(file);
533: sb.append(", host=");
534: sb.append(host);
535: sb.append(", port=");
536: sb.append(port);
537: sb.append(", protocol=");
538: sb.append(protocol);
539: sb.append(", query=");
540: sb.append(query);
541: sb.append(", ref=");
542: sb.append(ref);
543: sb.append(", userInfo=");
544: sb.append(userInfo);
545: sb.append("]");
546: return (sb.toString());
547:
548: // return (toExternalForm());
549:
550: }
551:
552: // -------------------------------------------------------- Private Methods
553:
554: /**
555: * Compare to String values for equality, taking appropriate care if one
556: * or both of the values are <code>null</code>.
557: *
558: * @param first First string
559: * @param second Second string
560: */
561: private boolean compare(String first, String second) {
562:
563: if (first == null) {
564: if (second == null)
565: return (true);
566: else
567: return (false);
568: } else {
569: if (second == null)
570: return (false);
571: else
572: return (first.equals(second));
573: }
574:
575: }
576:
577: /**
578: * Parse the specified portion of the string representation of a URL,
579: * assuming that it has a format similar to that for <code>http</code>.
580: *
581: * <p><strong>FIXME</strong> - This algorithm can undoubtedly be optimized
582: * for performance. However, that needs to wait until after sufficient
583: * unit tests are implemented to guarantee correct behavior with no
584: * regressions.</p>
585: *
586: * @param spec String representation being parsed
587: * @param start Starting offset, which will be just after the ':' (if
588: * there is one) that determined the protocol name
589: * @param limit Ending position, which will be the position of the '#'
590: * (if there is one) that delimited the anchor
591: *
592: * @exception MalformedURLException if a parsing error occurs
593: */
594: private void parse(String spec, int start, int limit)
595: throws MalformedURLException {
596:
597: // Trim the query string (if any) off the tail end
598: int question = spec.lastIndexOf('?', limit - 1);
599: if ((question >= 0) && (question < limit)) {
600: query = spec.substring(question + 1, limit);
601: limit = question;
602: } else {
603: query = null;
604: }
605:
606: // Parse the authority section
607: if (spec.indexOf("//", start) == start) {
608: int pathStart = spec.indexOf("/", start + 2);
609: if ((pathStart >= 0) && (pathStart < limit)) {
610: authority = spec.substring(start + 2, pathStart);
611: start = pathStart;
612: } else {
613: authority = spec.substring(start + 2, limit);
614: start = limit;
615: }
616: if (authority.length() > 0) {
617: int at = authority.indexOf('@');
618: if (at >= 0) {
619: userInfo = authority.substring(0, at);
620: }
621: int colon = authority.indexOf(':', at + 1);
622: if (colon >= 0) {
623: try {
624: port = Integer.parseInt(authority
625: .substring(colon + 1));
626: } catch (NumberFormatException e) {
627: throw new MalformedURLException(e.toString());
628: }
629: host = authority.substring(at + 1, colon);
630: } else {
631: host = authority.substring(at + 1);
632: port = -1;
633: }
634: }
635: }
636:
637: // Parse the path section
638: if (spec.indexOf("/", start) == start) { // Absolute path
639: path = spec.substring(start, limit);
640: if (query != null)
641: file = path + "?" + query;
642: else
643: file = path;
644: return;
645: }
646:
647: // Resolve relative path against our context's file
648: if (path == null) {
649: if (query != null)
650: file = "?" + query;
651: else
652: file = null;
653: return;
654: }
655: if (!path.startsWith("/"))
656: throw new MalformedURLException(
657: "Base path does not start with '/'");
658: if (!path.endsWith("/"))
659: path += "/../";
660: path += spec.substring(start, limit);
661: if (query != null)
662: file = path + "?" + query;
663: else
664: file = path;
665: return;
666:
667: }
668:
669: }
|