001: /*
002: * Copyright 1999-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.tomcat.util.net;
018:
019: import java.io.Serializable;
020: import java.net.MalformedURLException;
021:
022: /**
023: * <p><strong>URL</strong> is designed to provide public APIs for parsing
024: * and synthesizing Uniform Resource Locators as similar as possible to the
025: * APIs of <code>java.net.URL</code>, but without the ability to open a
026: * stream or connection. One of the consequences of this is that you can
027: * construct URLs for protocols for which a URLStreamHandler is not
028: * available (such as an "https" URL when JSSE is not installed).</p>
029: *
030: * <p><strong>WARNING</strong> - This class assumes that the string
031: * representation of a URL conforms to the <code>spec</code> argument
032: * as described in RFC 2396 "Uniform Resource Identifiers: Generic Syntax":
033: * <pre>
034: * <scheme>//<authority><path>?<query>#<fragment>
035: * </pre></p>
036: *
037: * <p><strong>FIXME</strong> - This class really ought to end up in a Commons
038: * package someplace.</p>
039: *
040: * @author Craig R. McClanahan
041: * @version $Revision: 1.7 $ $Date: 2004/02/24 08:50:05 $
042: */
043:
044: public final class URL implements Serializable {
045:
046: // ----------------------------------------------------------- Constructors
047:
048: /**
049: * Create a URL object from the specified String representation.
050: *
051: * @param spec String representation of the URL
052: *
053: * @exception MalformedURLException if the string representation
054: * cannot be parsed successfully
055: */
056: public URL(String spec) throws MalformedURLException {
057:
058: this (null, spec);
059:
060: }
061:
062: /**
063: * Create a URL object by parsing a string representation relative
064: * to a specified context. Based on logic from JDK 1.3.1's
065: * <code>java.net.URL</code>.
066: *
067: * @param context URL against which the relative representation
068: * is resolved
069: * @param spec String representation of the URL (usually relative)
070: *
071: * @exception MalformedURLException if the string representation
072: * cannot be parsed successfully
073: */
074: public URL(URL context, String spec) throws MalformedURLException {
075:
076: String original = spec;
077: int i, limit, c;
078: int start = 0;
079: String newProtocol = null;
080: boolean aRef = false;
081:
082: try {
083:
084: // Eliminate leading and trailing whitespace
085: limit = spec.length();
086: while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
087: limit--;
088: }
089: while ((start < limit) && (spec.charAt(start) <= ' ')) {
090: start++;
091: }
092:
093: // If the string representation starts with "url:", skip it
094: if (spec.regionMatches(true, start, "url:", 0, 4)) {
095: start += 4;
096: }
097:
098: // Is this a ref relative to the context URL?
099: if ((start < spec.length()) && (spec.charAt(start) == '#')) {
100: aRef = true;
101: }
102:
103: // Parse out the new protocol
104: for (i = start; !aRef && (i < limit)
105: && ((c = spec.charAt(i)) != '/'); i++) {
106: if (c == ':') {
107: String s = spec.substring(start, i).toLowerCase();
108: // Assume all protocols are valid
109: newProtocol = s;
110: start = i + 1;
111: break;
112: } else if (c == '#') {
113: aRef = true;
114: } else if (c == '?') {
115: break;
116: }
117: }
118:
119: // Only use our context if the protocols match
120: protocol = newProtocol;
121: if ((context != null)
122: && ((newProtocol == null) || newProtocol
123: .equalsIgnoreCase(context.getProtocol()))) {
124: // If the context is a hierarchical URL scheme and the spec
125: // contains a matching scheme then maintain backwards
126: // compatibility and treat it as if the spec didn't contain
127: // the scheme; see 5.2.3 of RFC2396
128: if ((context.getPath() != null)
129: && (context.getPath().startsWith("/")))
130: newProtocol = null;
131: if (newProtocol == null) {
132: protocol = context.getProtocol();
133: authority = context.getAuthority();
134: userInfo = context.getUserInfo();
135: host = context.getHost();
136: port = context.getPort();
137: file = context.getFile();
138: int question = file.lastIndexOf("?");
139: if (question < 0)
140: path = file;
141: else
142: path = file.substring(0, question);
143: }
144: }
145:
146: if (protocol == null)
147: throw new MalformedURLException("no protocol: "
148: + original);
149:
150: // Parse out any ref portion of the spec
151: i = spec.indexOf('#', start);
152: if (i >= 0) {
153: ref = spec.substring(i + 1, limit);
154: limit = i;
155: }
156:
157: // Parse the remainder of the spec in a protocol-specific fashion
158: parse(spec, start, limit);
159: if (context != null)
160: normalize();
161:
162: } catch (MalformedURLException e) {
163: throw e;
164: } catch (Exception e) {
165: throw new MalformedURLException(e.toString());
166: }
167:
168: }
169:
170: /**
171: * Create a URL object from the specified components. The default port
172: * number for the specified protocol will be used.
173: *
174: * @param protocol Name of the protocol to use
175: * @param host Name of the host addressed by this protocol
176: * @param file Filename on the specified host
177: *
178: * @exception MalformedURLException is never thrown, but present for
179: * compatible APIs
180: */
181: public URL(String protocol, String host, String file)
182: throws MalformedURLException {
183:
184: this (protocol, host, -1, file);
185:
186: }
187:
188: /**
189: * Create a URL object from the specified components. Specifying a port
190: * number of -1 indicates that the URL should use the default port for
191: * that protocol. Based on logic from JDK 1.3.1's
192: * <code>java.net.URL</code>.
193: *
194: * @param protocol Name of the protocol to use
195: * @param host Name of the host addressed by this protocol
196: * @param port Port number, or -1 for the default port for this protocol
197: * @param file Filename on the specified host
198: *
199: * @exception MalformedURLException is never thrown, but present for
200: * compatible APIs
201: */
202: public URL(String protocol, String host, int port, String file)
203: throws MalformedURLException {
204:
205: this .protocol = protocol;
206: this .host = host;
207: this .port = port;
208:
209: int hash = file.indexOf('#');
210: this .file = hash < 0 ? file : file.substring(0, hash);
211: this .ref = hash < 0 ? null : file.substring(hash + 1);
212: int question = file.lastIndexOf('?');
213: if (question >= 0) {
214: query = file.substring(question + 1);
215: path = file.substring(0, question);
216: } else
217: path = file;
218:
219: if ((host != null) && (host.length() > 0))
220: authority = (port == -1) ? host : host + ":" + port;
221:
222: }
223:
224: // ----------------------------------------------------- Instance Variables
225:
226: /**
227: * The authority part of the URL.
228: */
229: private String authority = null;
230:
231: /**
232: * The filename part of the URL.
233: */
234: private String file = null;
235:
236: /**
237: * The host name part of the URL.
238: */
239: private String host = null;
240:
241: /**
242: * The path part of the URL.
243: */
244: private String path = null;
245:
246: /**
247: * The port number part of the URL.
248: */
249: private int port = -1;
250:
251: /**
252: * The protocol name part of the URL.
253: */
254: private String protocol = null;
255:
256: /**
257: * The query part of the URL.
258: */
259: private String query = null;
260:
261: /**
262: * The reference part of the URL.
263: */
264: private String ref = null;
265:
266: /**
267: * The user info part of the URL.
268: */
269: private String userInfo = null;
270:
271: // --------------------------------------------------------- Public Methods
272:
273: /**
274: * Compare two URLs for equality. The result is <code>true</code> if and
275: * only if the argument is not null, and is a <code>URL</code> object
276: * that represents the same <code>URL</code> as this object. Two
277: * <code>URLs</code> are equal if they have the same protocol and
278: * reference the same host, the same port number on the host,
279: * and the same file and anchor on the host.
280: *
281: * @param obj The URL to compare against
282: */
283: public boolean equals(Object obj) {
284:
285: if (obj == null)
286: return (false);
287: if (!(obj instanceof URL))
288: return (false);
289: URL other = (URL) obj;
290: if (!sameFile(other))
291: return (false);
292: return (compare(ref, other.getRef()));
293:
294: }
295:
296: /**
297: * Return the authority part of the URL.
298: */
299: public String getAuthority() {
300:
301: return (this .authority);
302:
303: }
304:
305: /**
306: * Return the filename part of the URL. <strong>NOTE</strong> - For
307: * compatibility with <code>java.net.URL</code>, this value includes
308: * the query string if there was one. For just the path portion,
309: * call <code>getPath()</code> instead.
310: */
311: public String getFile() {
312:
313: if (file == null)
314: return ("");
315: return (this .file);
316:
317: }
318:
319: /**
320: * Return the host name part of the URL.
321: */
322: public String getHost() {
323:
324: return (this .host);
325:
326: }
327:
328: /**
329: * Return the path part of the URL.
330: */
331: public String getPath() {
332:
333: if (this .path == null)
334: return ("");
335: return (this .path);
336:
337: }
338:
339: /**
340: * Return the port number part of the URL.
341: */
342: public int getPort() {
343:
344: return (this .port);
345:
346: }
347:
348: /**
349: * Return the protocol name part of the URL.
350: */
351: public String getProtocol() {
352:
353: return (this .protocol);
354:
355: }
356:
357: /**
358: * Return the query part of the URL.
359: */
360: public String getQuery() {
361:
362: return (this .query);
363:
364: }
365:
366: /**
367: * Return the reference part of the URL.
368: */
369: public String getRef() {
370:
371: return (this .ref);
372:
373: }
374:
375: /**
376: * Return the user info part of the URL.
377: */
378: public String getUserInfo() {
379:
380: return (this .userInfo);
381:
382: }
383:
384: /**
385: * Normalize the <code>path</code> (and therefore <code>file</code>)
386: * portions of this URL.
387: * <p>
388: * <strong>NOTE</strong> - This method is not part of the public API
389: * of <code>java.net.URL</code>, but is provided as a value added
390: * service of this implementation.
391: *
392: * @exception MalformedURLException if a normalization error occurs,
393: * such as trying to move about the hierarchical root
394: */
395: public void normalize() throws MalformedURLException {
396:
397: // Special case for null path
398: if (path == null) {
399: if (query != null)
400: file = "?" + query;
401: else
402: file = "";
403: return;
404: }
405:
406: // Create a place for the normalized path
407: String normalized = path;
408: if (normalized.equals("/.")) {
409: path = "/";
410: if (query != null)
411: file = path + "?" + query;
412: else
413: file = path;
414: return;
415: }
416:
417: // Normalize the slashes and add leading slash if necessary
418: if (normalized.indexOf('\\') >= 0)
419: normalized = normalized.replace('\\', '/');
420: if (!normalized.startsWith("/"))
421: normalized = "/" + normalized;
422:
423: // Resolve occurrences of "//" in the normalized path
424: while (true) {
425: int index = normalized.indexOf("//");
426: if (index < 0)
427: break;
428: normalized = normalized.substring(0, index)
429: + normalized.substring(index + 1);
430: }
431:
432: // Resolve occurrences of "/./" in the normalized path
433: while (true) {
434: int index = normalized.indexOf("/./");
435: if (index < 0)
436: break;
437: normalized = normalized.substring(0, index)
438: + normalized.substring(index + 2);
439: }
440:
441: // Resolve occurrences of "/../" in the normalized path
442: while (true) {
443: int index = normalized.indexOf("/../");
444: if (index < 0)
445: break;
446: if (index == 0)
447: throw new MalformedURLException(
448: "Invalid relative URL reference");
449: int index2 = normalized.lastIndexOf('/', index - 1);
450: normalized = normalized.substring(0, index2)
451: + normalized.substring(index + 3);
452: }
453:
454: // Resolve occurrences of "/." at the end of the normalized path
455: if (normalized.endsWith("/."))
456: normalized = normalized.substring(0,
457: normalized.length() - 1);
458:
459: // Resolve occurrences of "/.." at the end of the normalized path
460: if (normalized.endsWith("/..")) {
461: int index = normalized.length() - 3;
462: int index2 = normalized.lastIndexOf('/', index - 1);
463: if (index2 < 0)
464: throw new MalformedURLException(
465: "Invalid relative URL reference");
466: normalized = normalized.substring(0, index2 + 1);
467: }
468:
469: // Return the normalized path that we have completed
470: path = normalized;
471: if (query != null)
472: file = path + "?" + query;
473: else
474: file = path;
475:
476: }
477:
478: /**
479: * Compare two URLs, excluding the "ref" fields. Returns <code>true</code>
480: * if this <code>URL</code> and the <code>other</code> argument both refer
481: * to the same resource. The two <code>URLs</code> might not both contain
482: * the same anchor.
483: */
484: public boolean sameFile(URL other) {
485:
486: if (!compare(protocol, other.getProtocol()))
487: return (false);
488: if (!compare(host, other.getHost()))
489: return (false);
490: if (port != other.getPort())
491: return (false);
492: if (!compare(file, other.getFile()))
493: return (false);
494: return (true);
495:
496: }
497:
498: /**
499: * Return a string representation of this URL. This follow the rules in
500: * RFC 2396, Section 5.2, Step 7.
501: */
502: public String toExternalForm() {
503:
504: StringBuffer sb = new StringBuffer();
505: if (protocol != null) {
506: sb.append(protocol);
507: sb.append(":");
508: }
509: if (authority != null) {
510: sb.append("//");
511: sb.append(authority);
512: }
513: if (path != null)
514: sb.append(path);
515: if (query != null) {
516: sb.append('?');
517: sb.append(query);
518: }
519: if (ref != null) {
520: sb.append('#');
521: sb.append(ref);
522: }
523: return (sb.toString());
524:
525: }
526:
527: /**
528: * Return a string representation of this object.
529: */
530: public String toString() {
531:
532: StringBuffer sb = new StringBuffer("URL[");
533: sb.append("authority=");
534: sb.append(authority);
535: sb.append(", file=");
536: sb.append(file);
537: sb.append(", host=");
538: sb.append(host);
539: sb.append(", port=");
540: sb.append(port);
541: sb.append(", protocol=");
542: sb.append(protocol);
543: sb.append(", query=");
544: sb.append(query);
545: sb.append(", ref=");
546: sb.append(ref);
547: sb.append(", userInfo=");
548: sb.append(userInfo);
549: sb.append("]");
550: return (sb.toString());
551:
552: // return (toExternalForm());
553:
554: }
555:
556: // -------------------------------------------------------- Private Methods
557:
558: /**
559: * Compare to String values for equality, taking appropriate care if one
560: * or both of the values are <code>null</code>.
561: *
562: * @param first First string
563: * @param second Second string
564: */
565: private boolean compare(String first, String second) {
566:
567: if (first == null) {
568: if (second == null)
569: return (true);
570: else
571: return (false);
572: } else {
573: if (second == null)
574: return (false);
575: else
576: return (first.equals(second));
577: }
578:
579: }
580:
581: /**
582: * Parse the specified portion of the string representation of a URL,
583: * assuming that it has a format similar to that for <code>http</code>.
584: *
585: * <p><strong>FIXME</strong> - This algorithm can undoubtedly be optimized
586: * for performance. However, that needs to wait until after sufficient
587: * unit tests are implemented to guarantee correct behavior with no
588: * regressions.</p>
589: *
590: * @param spec String representation being parsed
591: * @param start Starting offset, which will be just after the ':' (if
592: * there is one) that determined the protocol name
593: * @param limit Ending position, which will be the position of the '#'
594: * (if there is one) that delimited the anchor
595: *
596: * @exception MalformedURLException if a parsing error occurs
597: */
598: private void parse(String spec, int start, int limit)
599: throws MalformedURLException {
600:
601: // Trim the query string (if any) off the tail end
602: int question = spec.lastIndexOf('?', limit - 1);
603: if ((question >= 0) && (question < limit)) {
604: query = spec.substring(question + 1, limit);
605: limit = question;
606: } else {
607: query = null;
608: }
609:
610: // Parse the authority section
611: if (spec.indexOf("//", start) == start) {
612: int pathStart = spec.indexOf("/", start + 2);
613: if ((pathStart >= 0) && (pathStart < limit)) {
614: authority = spec.substring(start + 2, pathStart);
615: start = pathStart;
616: } else {
617: authority = spec.substring(start + 2, limit);
618: start = limit;
619: }
620: if (authority.length() > 0) {
621: int at = authority.indexOf('@');
622: if (at >= 0) {
623: userInfo = authority.substring(0, at);
624: }
625: int ipv6 = authority.indexOf('[', at + 1);
626: int hStart = at + 1;
627: if (ipv6 >= 0) {
628: hStart = ipv6;
629: ipv6 = authority.indexOf(']', ipv6);
630: if (ipv6 < 0) {
631: throw new MalformedURLException(
632: "Closing ']' not found in IPV6 address: "
633: + authority);
634: } else {
635: at = ipv6 - 1;
636: }
637: }
638:
639: int colon = authority.indexOf(':', at + 1);
640: if (colon >= 0) {
641: try {
642: port = Integer.parseInt(authority
643: .substring(colon + 1));
644: } catch (NumberFormatException e) {
645: throw new MalformedURLException(e.toString());
646: }
647: host = authority.substring(hStart, colon);
648: } else {
649: host = authority.substring(hStart);
650: port = -1;
651: }
652: }
653: }
654:
655: // Parse the path section
656: if (spec.indexOf("/", start) == start) { // Absolute path
657: path = spec.substring(start, limit);
658: if (query != null)
659: file = path + "?" + query;
660: else
661: file = path;
662: return;
663: }
664:
665: // Resolve relative path against our context's file
666: if (path == null) {
667: if (query != null)
668: file = "?" + query;
669: else
670: file = null;
671: return;
672: }
673: if (!path.startsWith("/"))
674: throw new MalformedURLException(
675: "Base path does not start with '/'");
676: if (!path.endsWith("/"))
677: path += "/../";
678: path += spec.substring(start, limit);
679: if (query != null)
680: file = path + "?" + query;
681: else
682: file = path;
683: return;
684:
685: }
686:
687: }
|