001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.catalina.util;
019:
020: import java.io.Serializable;
021: import java.net.MalformedURLException;
022:
023: /**
024: * <p><strong>URL</strong> is designed to provide public APIs for parsing
025: * and synthesizing Uniform Resource Locators as similar as possible to the
026: * APIs of <code>java.net.URL</code>, but without the ability to open a
027: * stream or connection. One of the consequences of this is that you can
028: * construct URLs for protocols for which a URLStreamHandler is not
029: * available (such as an "https" URL when JSSE is not installed).</p>
030: *
031: * <p><strong>WARNING</strong> - This class assumes that the string
032: * representation of a URL conforms to the <code>spec</code> argument
033: * as described in RFC 2396 "Uniform Resource Identifiers: Generic Syntax":
034: * <pre>
035: * <scheme>//<authority><path>?<query>#<fragment>
036: * </pre></p>
037: *
038: * <p><strong>FIXME</strong> - This class really ought to end up in a Commons
039: * package someplace.</p>
040: *
041: * @author Craig R. McClanahan
042: * @version $Revision: 467222 $ $Date: 2006-10-24 05:17:11 +0200 (mar., 24 oct. 2006) $
043: */
044:
045: public final class URL implements Serializable {
046:
047: // ----------------------------------------------------------- Constructors
048:
049: /**
050: * Create a URL object from the specified String representation.
051: *
052: * @param spec String representation of the URL
053: *
054: * @exception MalformedURLException if the string representation
055: * cannot be parsed successfully
056: */
057: public URL(String spec) throws MalformedURLException {
058:
059: this (null, spec);
060:
061: }
062:
063: /**
064: * Create a URL object by parsing a string representation relative
065: * to a specified context. Based on logic from JDK 1.3.1's
066: * <code>java.net.URL</code>.
067: *
068: * @param context URL against which the relative representation
069: * is resolved
070: * @param spec String representation of the URL (usually relative)
071: *
072: * @exception MalformedURLException if the string representation
073: * cannot be parsed successfully
074: */
075: public URL(URL context, String spec) throws MalformedURLException {
076:
077: String original = spec;
078: int i, limit, c;
079: int start = 0;
080: String newProtocol = null;
081: boolean aRef = false;
082:
083: try {
084:
085: // Eliminate leading and trailing whitespace
086: limit = spec.length();
087: while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
088: limit--;
089: }
090: while ((start < limit) && (spec.charAt(start) <= ' ')) {
091: start++;
092: }
093:
094: // If the string representation starts with "url:", skip it
095: if (spec.regionMatches(true, start, "url:", 0, 4)) {
096: start += 4;
097: }
098:
099: // Is this a ref relative to the context URL?
100: if ((start < spec.length()) && (spec.charAt(start) == '#')) {
101: aRef = true;
102: }
103:
104: // Parse out the new protocol
105: for (i = start; !aRef && (i < limit)
106: && ((c = spec.charAt(i)) != '/'); i++) {
107: if (c == ':') {
108: String s = spec.substring(start, i).toLowerCase();
109: // Assume all protocols are valid
110: newProtocol = s;
111: start = i + 1;
112: break;
113: }
114: }
115:
116: // Only use our context if the protocols match
117: protocol = newProtocol;
118: if ((context != null)
119: && ((newProtocol == null) || newProtocol
120: .equalsIgnoreCase(context.getProtocol()))) {
121: // If the context is a hierarchical URL scheme and the spec
122: // contains a matching scheme then maintain backwards
123: // compatibility and treat it as if the spec didn't contain
124: // the scheme; see 5.2.3 of RFC2396
125: if ((context.getPath() != null)
126: && (context.getPath().startsWith("/")))
127: newProtocol = null;
128: if (newProtocol == null) {
129: protocol = context.getProtocol();
130: authority = context.getAuthority();
131: userInfo = context.getUserInfo();
132: host = context.getHost();
133: port = context.getPort();
134: file = context.getFile();
135: int question = file.lastIndexOf("?");
136: if (question < 0)
137: path = file;
138: else
139: path = file.substring(0, question);
140: }
141: }
142:
143: if (protocol == null)
144: throw new MalformedURLException("no protocol: "
145: + original);
146:
147: // Parse out any ref portion of the spec
148: i = spec.indexOf('#', start);
149: if (i >= 0) {
150: ref = spec.substring(i + 1, limit);
151: limit = i;
152: }
153:
154: // Parse the remainder of the spec in a protocol-specific fashion
155: parse(spec, start, limit);
156: if (context != null)
157: normalize();
158:
159: } catch (MalformedURLException e) {
160: throw e;
161: } catch (Exception e) {
162: throw new MalformedURLException(e.toString());
163: }
164:
165: }
166:
167: /**
168: * Create a URL object from the specified components. The default port
169: * number for the specified protocol will be used.
170: *
171: * @param protocol Name of the protocol to use
172: * @param host Name of the host addressed by this protocol
173: * @param file Filename on the specified host
174: *
175: * @exception MalformedURLException is never thrown, but present for
176: * compatible APIs
177: */
178: public URL(String protocol, String host, String file)
179: throws MalformedURLException {
180:
181: this (protocol, host, -1, file);
182:
183: }
184:
185: /**
186: * Create a URL object from the specified components. Specifying a port
187: * number of -1 indicates that the URL should use the default port for
188: * that protocol. Based on logic from JDK 1.3.1's
189: * <code>java.net.URL</code>.
190: *
191: * @param protocol Name of the protocol to use
192: * @param host Name of the host addressed by this protocol
193: * @param port Port number, or -1 for the default port for this protocol
194: * @param file Filename on the specified host
195: *
196: * @exception MalformedURLException is never thrown, but present for
197: * compatible APIs
198: */
199: public URL(String protocol, String host, int port, String file)
200: throws MalformedURLException {
201:
202: this .protocol = protocol;
203: this .host = host;
204: this .port = port;
205:
206: int hash = file.indexOf('#');
207: this .file = hash < 0 ? file : file.substring(0, hash);
208: this .ref = hash < 0 ? null : file.substring(hash + 1);
209: int question = file.lastIndexOf('?');
210: if (question >= 0) {
211: query = file.substring(question + 1);
212: path = file.substring(0, question);
213: } else
214: path = file;
215:
216: if ((host != null) && (host.length() > 0))
217: authority = (port == -1) ? host : host + ":" + port;
218:
219: }
220:
221: // ----------------------------------------------------- Instance Variables
222:
223: /**
224: * The authority part of the URL.
225: */
226: private String authority = null;
227:
228: /**
229: * The filename part of the URL.
230: */
231: private String file = null;
232:
233: /**
234: * The host name part of the URL.
235: */
236: private String host = null;
237:
238: /**
239: * The path part of the URL.
240: */
241: private String path = null;
242:
243: /**
244: * The port number part of the URL.
245: */
246: private int port = -1;
247:
248: /**
249: * The protocol name part of the URL.
250: */
251: private String protocol = null;
252:
253: /**
254: * The query part of the URL.
255: */
256: private String query = null;
257:
258: /**
259: * The reference part of the URL.
260: */
261: private String ref = null;
262:
263: /**
264: * The user info part of the URL.
265: */
266: private String userInfo = null;
267:
268: // --------------------------------------------------------- Public Methods
269:
270: /**
271: * Compare two URLs for equality. The result is <code>true</code> if and
272: * only if the argument is not null, and is a <code>URL</code> object
273: * that represents the same <code>URL</code> as this object. Two
274: * <code>URLs</code> are equal if they have the same protocol and
275: * reference the same host, the same port number on the host,
276: * and the same file and anchor on the host.
277: *
278: * @param obj The URL to compare against
279: */
280: public boolean equals(Object obj) {
281:
282: if (obj == null)
283: return (false);
284: if (!(obj instanceof URL))
285: return (false);
286: URL other = (URL) obj;
287: if (!sameFile(other))
288: return (false);
289: return (compare(ref, other.getRef()));
290:
291: }
292:
293: /**
294: * Return the authority part of the URL.
295: */
296: public String getAuthority() {
297:
298: return (this .authority);
299:
300: }
301:
302: /**
303: * Return the filename part of the URL. <strong>NOTE</strong> - For
304: * compatibility with <code>java.net.URL</code>, this value includes
305: * the query string if there was one. For just the path portion,
306: * call <code>getPath()</code> instead.
307: */
308: public String getFile() {
309:
310: if (file == null)
311: return ("");
312: return (this .file);
313:
314: }
315:
316: /**
317: * Return the host name part of the URL.
318: */
319: public String getHost() {
320:
321: return (this .host);
322:
323: }
324:
325: /**
326: * Return the path part of the URL.
327: */
328: public String getPath() {
329:
330: if (this .path == null)
331: return ("");
332: return (this .path);
333:
334: }
335:
336: /**
337: * Return the port number part of the URL.
338: */
339: public int getPort() {
340:
341: return (this .port);
342:
343: }
344:
345: /**
346: * Return the protocol name part of the URL.
347: */
348: public String getProtocol() {
349:
350: return (this .protocol);
351:
352: }
353:
354: /**
355: * Return the query part of the URL.
356: */
357: public String getQuery() {
358:
359: return (this .query);
360:
361: }
362:
363: /**
364: * Return the reference part of the URL.
365: */
366: public String getRef() {
367:
368: return (this .ref);
369:
370: }
371:
372: /**
373: * Return the user info part of the URL.
374: */
375: public String getUserInfo() {
376:
377: return (this .userInfo);
378:
379: }
380:
381: /**
382: * Normalize the <code>path</code> (and therefore <code>file</code>)
383: * portions of this URL.
384: * <p>
385: * <strong>NOTE</strong> - This method is not part of the public API
386: * of <code>java.net.URL</code>, but is provided as a value added
387: * service of this implementation.
388: *
389: * @exception MalformedURLException if a normalization error occurs,
390: * such as trying to move about the hierarchical root
391: */
392: public void normalize() throws MalformedURLException {
393:
394: // Special case for null path
395: if (path == null) {
396: if (query != null)
397: file = "?" + query;
398: else
399: file = "";
400: return;
401: }
402:
403: // Create a place for the normalized path
404: String normalized = path;
405: if (normalized.equals("/.")) {
406: path = "/";
407: if (query != null)
408: file = path + "?" + query;
409: else
410: file = path;
411: return;
412: }
413:
414: // Normalize the slashes and add leading slash if necessary
415: if (normalized.indexOf('\\') >= 0)
416: normalized = normalized.replace('\\', '/');
417: if (!normalized.startsWith("/"))
418: normalized = "/" + normalized;
419:
420: // Resolve occurrences of "//" in the normalized path
421: while (true) {
422: int index = normalized.indexOf("//");
423: if (index < 0)
424: break;
425: normalized = normalized.substring(0, index)
426: + normalized.substring(index + 1);
427: }
428:
429: // Resolve occurrences of "/./" in the normalized path
430: while (true) {
431: int index = normalized.indexOf("/./");
432: if (index < 0)
433: break;
434: normalized = normalized.substring(0, index)
435: + normalized.substring(index + 2);
436: }
437:
438: // Resolve occurrences of "/../" in the normalized path
439: while (true) {
440: int index = normalized.indexOf("/../");
441: if (index < 0)
442: break;
443: if (index == 0)
444: throw new MalformedURLException(
445: "Invalid relative URL reference");
446: int index2 = normalized.lastIndexOf('/', index - 1);
447: normalized = normalized.substring(0, index2)
448: + normalized.substring(index + 3);
449: }
450:
451: // Resolve occurrences of "/." at the end of the normalized path
452: if (normalized.endsWith("/."))
453: normalized = normalized.substring(0,
454: normalized.length() - 1);
455:
456: // Resolve occurrences of "/.." at the end of the normalized path
457: if (normalized.endsWith("/..")) {
458: int index = normalized.length() - 3;
459: int index2 = normalized.lastIndexOf('/', index - 1);
460: if (index2 < 0)
461: throw new MalformedURLException(
462: "Invalid relative URL reference");
463: normalized = normalized.substring(0, index2 + 1);
464: }
465:
466: // Return the normalized path that we have completed
467: path = normalized;
468: if (query != null)
469: file = path + "?" + query;
470: else
471: file = path;
472:
473: }
474:
475: /**
476: * Compare two URLs, excluding the "ref" fields. Returns <code>true</code>
477: * if this <code>URL</code> and the <code>other</code> argument both refer
478: * to the same resource. The two <code>URLs</code> might not both contain
479: * the same anchor.
480: */
481: public boolean sameFile(URL other) {
482:
483: if (!compare(protocol, other.getProtocol()))
484: return (false);
485: if (!compare(host, other.getHost()))
486: return (false);
487: if (port != other.getPort())
488: return (false);
489: if (!compare(file, other.getFile()))
490: return (false);
491: return (true);
492:
493: }
494:
495: /**
496: * Return a string representation of this URL. This follow the rules in
497: * RFC 2396, Section 5.2, Step 7.
498: */
499: public String toExternalForm() {
500:
501: StringBuffer sb = new StringBuffer();
502: if (protocol != null) {
503: sb.append(protocol);
504: sb.append(":");
505: }
506: if (authority != null) {
507: sb.append("//");
508: sb.append(authority);
509: }
510: if (path != null)
511: sb.append(path);
512: if (query != null) {
513: sb.append('?');
514: sb.append(query);
515: }
516: if (ref != null) {
517: sb.append('#');
518: sb.append(ref);
519: }
520: return (sb.toString());
521:
522: }
523:
524: /**
525: * Return a string representation of this object.
526: */
527: public String toString() {
528:
529: StringBuffer sb = new StringBuffer("URL[");
530: sb.append("authority=");
531: sb.append(authority);
532: sb.append(", file=");
533: sb.append(file);
534: sb.append(", host=");
535: sb.append(host);
536: sb.append(", port=");
537: sb.append(port);
538: sb.append(", protocol=");
539: sb.append(protocol);
540: sb.append(", query=");
541: sb.append(query);
542: sb.append(", ref=");
543: sb.append(ref);
544: sb.append(", userInfo=");
545: sb.append(userInfo);
546: sb.append("]");
547: return (sb.toString());
548:
549: // return (toExternalForm());
550:
551: }
552:
553: // -------------------------------------------------------- Private Methods
554:
555: /**
556: * Compare to String values for equality, taking appropriate care if one
557: * or both of the values are <code>null</code>.
558: *
559: * @param first First string
560: * @param second Second string
561: */
562: private boolean compare(String first, String second) {
563:
564: if (first == null) {
565: if (second == null)
566: return (true);
567: else
568: return (false);
569: } else {
570: if (second == null)
571: return (false);
572: else
573: return (first.equals(second));
574: }
575:
576: }
577:
578: /**
579: * Parse the specified portion of the string representation of a URL,
580: * assuming that it has a format similar to that for <code>http</code>.
581: *
582: * <p><strong>FIXME</strong> - This algorithm can undoubtedly be optimized
583: * for performance. However, that needs to wait until after sufficient
584: * unit tests are implemented to guarantee correct behavior with no
585: * regressions.</p>
586: *
587: * @param spec String representation being parsed
588: * @param start Starting offset, which will be just after the ':' (if
589: * there is one) that determined the protocol name
590: * @param limit Ending position, which will be the position of the '#'
591: * (if there is one) that delimited the anchor
592: *
593: * @exception MalformedURLException if a parsing error occurs
594: */
595: private void parse(String spec, int start, int limit)
596: throws MalformedURLException {
597:
598: // Trim the query string (if any) off the tail end
599: int question = spec.lastIndexOf('?', limit - 1);
600: if ((question >= 0) && (question < limit)) {
601: query = spec.substring(question + 1, limit);
602: limit = question;
603: } else {
604: query = null;
605: }
606:
607: // Parse the authority section
608: if (spec.indexOf("//", start) == start) {
609: int pathStart = spec.indexOf("/", start + 2);
610: if ((pathStart >= 0) && (pathStart < limit)) {
611: authority = spec.substring(start + 2, pathStart);
612: start = pathStart;
613: } else {
614: authority = spec.substring(start + 2, limit);
615: start = limit;
616: }
617: if (authority.length() > 0) {
618: int at = authority.indexOf('@');
619: if (at >= 0) {
620: userInfo = authority.substring(0, at);
621: }
622: int colon = authority.indexOf(':', at + 1);
623: if (colon >= 0) {
624: try {
625: port = Integer.parseInt(authority
626: .substring(colon + 1));
627: } catch (NumberFormatException e) {
628: throw new MalformedURLException(e.toString());
629: }
630: host = authority.substring(at + 1, colon);
631: } else {
632: host = authority.substring(at + 1);
633: port = -1;
634: }
635: }
636: }
637:
638: // Parse the path section
639: if (spec.indexOf("/", start) == start) { // Absolute path
640: path = spec.substring(start, limit);
641: if (query != null)
642: file = path + "?" + query;
643: else
644: file = path;
645: return;
646: }
647:
648: // Resolve relative path against our context's file
649: if (path == null) {
650: if (query != null)
651: file = "?" + query;
652: else
653: file = null;
654: return;
655: }
656: if (!path.startsWith("/"))
657: throw new MalformedURLException(
658: "Base path does not start with '/'");
659: if (!path.endsWith("/"))
660: path += "/../";
661: path += spec.substring(start, limit);
662: if (query != null)
663: file = path + "?" + query;
664: else
665: file = path;
666: return;
667:
668: }
669:
670: }
|