001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.tomcat.util.net;
019:
020: import java.io.Serializable;
021: import java.net.MalformedURLException;
022:
023: /**
024: * <p><strong>URL</strong> is designed to provide public APIs for parsing
025: * and synthesizing Uniform Resource Locators as similar as possible to the
026: * APIs of <code>java.net.URL</code>, but without the ability to open a
027: * stream or connection. One of the consequences of this is that you can
028: * construct URLs for protocols for which a URLStreamHandler is not
029: * available (such as an "https" URL when JSSE is not installed).</p>
030: *
031: * <p><strong>WARNING</strong> - This class assumes that the string
032: * representation of a URL conforms to the <code>spec</code> argument
033: * as described in RFC 2396 "Uniform Resource Identifiers: Generic Syntax":
034: * <pre>
035: * <scheme>//<authority><path>?<query>#<fragment>
036: * </pre></p>
037: *
038: * <p><strong>FIXME</strong> - This class really ought to end up in a Commons
039: * package someplace.</p>
040: *
041: * @author Craig R. McClanahan
042: * @version $Revision: 467222 $ $Date: 2006-10-24 05:17:11 +0200 (mar., 24 oct. 2006) $
043: */
044:
045: public final class URL implements Serializable {
046:
047: // ----------------------------------------------------------- Constructors
048:
049: /**
050: * Create a URL object from the specified String representation.
051: *
052: * @param spec String representation of the URL
053: *
054: * @exception MalformedURLException if the string representation
055: * cannot be parsed successfully
056: */
057: public URL(String spec) throws MalformedURLException {
058:
059: this (null, spec);
060:
061: }
062:
063: /**
064: * Create a URL object by parsing a string representation relative
065: * to a specified context. Based on logic from JDK 1.3.1's
066: * <code>java.net.URL</code>.
067: *
068: * @param context URL against which the relative representation
069: * is resolved
070: * @param spec String representation of the URL (usually relative)
071: *
072: * @exception MalformedURLException if the string representation
073: * cannot be parsed successfully
074: */
075: public URL(URL context, String spec) throws MalformedURLException {
076:
077: String original = spec;
078: int i, limit, c;
079: int start = 0;
080: String newProtocol = null;
081: boolean aRef = false;
082:
083: try {
084:
085: // Eliminate leading and trailing whitespace
086: limit = spec.length();
087: while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
088: limit--;
089: }
090: while ((start < limit) && (spec.charAt(start) <= ' ')) {
091: start++;
092: }
093:
094: // If the string representation starts with "url:", skip it
095: if (spec.regionMatches(true, start, "url:", 0, 4)) {
096: start += 4;
097: }
098:
099: // Is this a ref relative to the context URL?
100: if ((start < spec.length()) && (spec.charAt(start) == '#')) {
101: aRef = true;
102: }
103:
104: // Parse out the new protocol
105: for (i = start; !aRef && (i < limit); i++) {
106: c = spec.charAt(i);
107: if (c == ':') {
108: String s = spec.substring(start, i).toLowerCase();
109: // Assume all protocols are valid
110: newProtocol = s;
111: start = i + 1;
112: break;
113: } else if (c == '#') {
114: aRef = true;
115: } else if (!isSchemeChar((char) c)) {
116: break;
117: }
118: }
119:
120: // Only use our context if the protocols match
121: protocol = newProtocol;
122: if ((context != null)
123: && ((newProtocol == null) || newProtocol
124: .equalsIgnoreCase(context.getProtocol()))) {
125: // If the context is a hierarchical URL scheme and the spec
126: // contains a matching scheme then maintain backwards
127: // compatibility and treat it as if the spec didn't contain
128: // the scheme; see 5.2.3 of RFC2396
129: if ((context.getPath() != null)
130: && (context.getPath().startsWith("/")))
131: newProtocol = null;
132: if (newProtocol == null) {
133: protocol = context.getProtocol();
134: authority = context.getAuthority();
135: userInfo = context.getUserInfo();
136: host = context.getHost();
137: port = context.getPort();
138: file = context.getFile();
139: int question = file.lastIndexOf("?");
140: if (question < 0)
141: path = file;
142: else
143: path = file.substring(0, question);
144: }
145: }
146:
147: if (protocol == null)
148: throw new MalformedURLException("no protocol: "
149: + original);
150:
151: // Parse out any ref portion of the spec
152: i = spec.indexOf('#', start);
153: if (i >= 0) {
154: ref = spec.substring(i + 1, limit);
155: limit = i;
156: }
157:
158: // Parse the remainder of the spec in a protocol-specific fashion
159: parse(spec, start, limit);
160: if (context != null)
161: normalize();
162:
163: } catch (MalformedURLException e) {
164: throw e;
165: } catch (Exception e) {
166: throw new MalformedURLException(e.toString());
167: }
168:
169: }
170:
171: /**
172: * Create a URL object from the specified components. The default port
173: * number for the specified protocol will be used.
174: *
175: * @param protocol Name of the protocol to use
176: * @param host Name of the host addressed by this protocol
177: * @param file Filename on the specified host
178: *
179: * @exception MalformedURLException is never thrown, but present for
180: * compatible APIs
181: */
182: public URL(String protocol, String host, String file)
183: throws MalformedURLException {
184:
185: this (protocol, host, -1, file);
186:
187: }
188:
189: /**
190: * Create a URL object from the specified components. Specifying a port
191: * number of -1 indicates that the URL should use the default port for
192: * that protocol. Based on logic from JDK 1.3.1's
193: * <code>java.net.URL</code>.
194: *
195: * @param protocol Name of the protocol to use
196: * @param host Name of the host addressed by this protocol
197: * @param port Port number, or -1 for the default port for this protocol
198: * @param file Filename on the specified host
199: *
200: * @exception MalformedURLException is never thrown, but present for
201: * compatible APIs
202: */
203: public URL(String protocol, String host, int port, String file)
204: throws MalformedURLException {
205:
206: this .protocol = protocol;
207: this .host = host;
208: this .port = port;
209:
210: int hash = file.indexOf('#');
211: this .file = hash < 0 ? file : file.substring(0, hash);
212: this .ref = hash < 0 ? null : file.substring(hash + 1);
213: int question = file.lastIndexOf('?');
214: if (question >= 0) {
215: query = file.substring(question + 1);
216: path = file.substring(0, question);
217: } else
218: path = file;
219:
220: if ((host != null) && (host.length() > 0))
221: authority = (port == -1) ? host : host + ":" + port;
222:
223: }
224:
225: // ----------------------------------------------------- Instance Variables
226:
227: /**
228: * The authority part of the URL.
229: */
230: private String authority = null;
231:
232: /**
233: * The filename part of the URL.
234: */
235: private String file = null;
236:
237: /**
238: * The host name part of the URL.
239: */
240: private String host = null;
241:
242: /**
243: * The path part of the URL.
244: */
245: private String path = null;
246:
247: /**
248: * The port number part of the URL.
249: */
250: private int port = -1;
251:
252: /**
253: * The protocol name part of the URL.
254: */
255: private String protocol = null;
256:
257: /**
258: * The query part of the URL.
259: */
260: private String query = null;
261:
262: /**
263: * The reference part of the URL.
264: */
265: private String ref = null;
266:
267: /**
268: * The user info part of the URL.
269: */
270: private String userInfo = null;
271:
272: // --------------------------------------------------------- Public Methods
273:
274: /**
275: * Compare two URLs for equality. The result is <code>true</code> if and
276: * only if the argument is not null, and is a <code>URL</code> object
277: * that represents the same <code>URL</code> as this object. Two
278: * <code>URLs</code> are equal if they have the same protocol and
279: * reference the same host, the same port number on the host,
280: * and the same file and anchor on the host.
281: *
282: * @param obj The URL to compare against
283: */
284: public boolean equals(Object obj) {
285:
286: if (obj == null)
287: return (false);
288: if (!(obj instanceof URL))
289: return (false);
290: URL other = (URL) obj;
291: if (!sameFile(other))
292: return (false);
293: return (compare(ref, other.getRef()));
294:
295: }
296:
297: /**
298: * Return the authority part of the URL.
299: */
300: public String getAuthority() {
301:
302: return (this .authority);
303:
304: }
305:
306: /**
307: * Return the filename part of the URL. <strong>NOTE</strong> - For
308: * compatibility with <code>java.net.URL</code>, this value includes
309: * the query string if there was one. For just the path portion,
310: * call <code>getPath()</code> instead.
311: */
312: public String getFile() {
313:
314: if (file == null)
315: return ("");
316: return (this .file);
317:
318: }
319:
320: /**
321: * Return the host name part of the URL.
322: */
323: public String getHost() {
324:
325: return (this .host);
326:
327: }
328:
329: /**
330: * Return the path part of the URL.
331: */
332: public String getPath() {
333:
334: if (this .path == null)
335: return ("");
336: return (this .path);
337:
338: }
339:
340: /**
341: * Return the port number part of the URL.
342: */
343: public int getPort() {
344:
345: return (this .port);
346:
347: }
348:
349: /**
350: * Return the protocol name part of the URL.
351: */
352: public String getProtocol() {
353:
354: return (this .protocol);
355:
356: }
357:
358: /**
359: * Return the query part of the URL.
360: */
361: public String getQuery() {
362:
363: return (this .query);
364:
365: }
366:
367: /**
368: * Return the reference part of the URL.
369: */
370: public String getRef() {
371:
372: return (this .ref);
373:
374: }
375:
376: /**
377: * Return the user info part of the URL.
378: */
379: public String getUserInfo() {
380:
381: return (this .userInfo);
382:
383: }
384:
385: /**
386: * Normalize the <code>path</code> (and therefore <code>file</code>)
387: * portions of this URL.
388: * <p>
389: * <strong>NOTE</strong> - This method is not part of the public API
390: * of <code>java.net.URL</code>, but is provided as a value added
391: * service of this implementation.
392: *
393: * @exception MalformedURLException if a normalization error occurs,
394: * such as trying to move about the hierarchical root
395: */
396: public void normalize() throws MalformedURLException {
397:
398: // Special case for null path
399: if (path == null) {
400: if (query != null)
401: file = "?" + query;
402: else
403: file = "";
404: return;
405: }
406:
407: // Create a place for the normalized path
408: String normalized = path;
409: if (normalized.equals("/.")) {
410: path = "/";
411: if (query != null)
412: file = path + "?" + query;
413: else
414: file = path;
415: return;
416: }
417:
418: // Normalize the slashes and add leading slash if necessary
419: if (normalized.indexOf('\\') >= 0)
420: normalized = normalized.replace('\\', '/');
421: if (!normalized.startsWith("/"))
422: normalized = "/" + normalized;
423:
424: // Resolve occurrences of "//" in the normalized path
425: while (true) {
426: int index = normalized.indexOf("//");
427: if (index < 0)
428: break;
429: normalized = normalized.substring(0, index)
430: + normalized.substring(index + 1);
431: }
432:
433: // Resolve occurrences of "/./" in the normalized path
434: while (true) {
435: int index = normalized.indexOf("/./");
436: if (index < 0)
437: break;
438: normalized = normalized.substring(0, index)
439: + normalized.substring(index + 2);
440: }
441:
442: // Resolve occurrences of "/../" in the normalized path
443: while (true) {
444: int index = normalized.indexOf("/../");
445: if (index < 0)
446: break;
447: if (index == 0)
448: throw new MalformedURLException(
449: "Invalid relative URL reference");
450: int index2 = normalized.lastIndexOf('/', index - 1);
451: normalized = normalized.substring(0, index2)
452: + normalized.substring(index + 3);
453: }
454:
455: // Resolve occurrences of "/." at the end of the normalized path
456: if (normalized.endsWith("/."))
457: normalized = normalized.substring(0,
458: normalized.length() - 1);
459:
460: // Resolve occurrences of "/.." at the end of the normalized path
461: if (normalized.endsWith("/..")) {
462: int index = normalized.length() - 3;
463: int index2 = normalized.lastIndexOf('/', index - 1);
464: if (index2 < 0)
465: throw new MalformedURLException(
466: "Invalid relative URL reference");
467: normalized = normalized.substring(0, index2 + 1);
468: }
469:
470: // Return the normalized path that we have completed
471: path = normalized;
472: if (query != null)
473: file = path + "?" + query;
474: else
475: file = path;
476:
477: }
478:
479: /**
480: * Compare two URLs, excluding the "ref" fields. Returns <code>true</code>
481: * if this <code>URL</code> and the <code>other</code> argument both refer
482: * to the same resource. The two <code>URLs</code> might not both contain
483: * the same anchor.
484: */
485: public boolean sameFile(URL other) {
486:
487: if (!compare(protocol, other.getProtocol()))
488: return (false);
489: if (!compare(host, other.getHost()))
490: return (false);
491: if (port != other.getPort())
492: return (false);
493: if (!compare(file, other.getFile()))
494: return (false);
495: return (true);
496:
497: }
498:
499: /**
500: * Return a string representation of this URL. This follow the rules in
501: * RFC 2396, Section 5.2, Step 7.
502: */
503: public String toExternalForm() {
504:
505: StringBuffer sb = new StringBuffer();
506: if (protocol != null) {
507: sb.append(protocol);
508: sb.append(":");
509: }
510: if (authority != null) {
511: sb.append("//");
512: sb.append(authority);
513: }
514: if (path != null)
515: sb.append(path);
516: if (query != null) {
517: sb.append('?');
518: sb.append(query);
519: }
520: if (ref != null) {
521: sb.append('#');
522: sb.append(ref);
523: }
524: return (sb.toString());
525:
526: }
527:
528: /**
529: * Return a string representation of this object.
530: */
531: public String toString() {
532:
533: StringBuffer sb = new StringBuffer("URL[");
534: sb.append("authority=");
535: sb.append(authority);
536: sb.append(", file=");
537: sb.append(file);
538: sb.append(", host=");
539: sb.append(host);
540: sb.append(", port=");
541: sb.append(port);
542: sb.append(", protocol=");
543: sb.append(protocol);
544: sb.append(", query=");
545: sb.append(query);
546: sb.append(", ref=");
547: sb.append(ref);
548: sb.append(", userInfo=");
549: sb.append(userInfo);
550: sb.append("]");
551: return (sb.toString());
552:
553: // return (toExternalForm());
554:
555: }
556:
557: // -------------------------------------------------------- Private Methods
558:
559: /**
560: * Compare to String values for equality, taking appropriate care if one
561: * or both of the values are <code>null</code>.
562: *
563: * @param first First string
564: * @param second Second string
565: */
566: private boolean compare(String first, String second) {
567:
568: if (first == null) {
569: if (second == null)
570: return (true);
571: else
572: return (false);
573: } else {
574: if (second == null)
575: return (false);
576: else
577: return (first.equals(second));
578: }
579:
580: }
581:
582: /**
583: * Parse the specified portion of the string representation of a URL,
584: * assuming that it has a format similar to that for <code>http</code>.
585: *
586: * <p><strong>FIXME</strong> - This algorithm can undoubtedly be optimized
587: * for performance. However, that needs to wait until after sufficient
588: * unit tests are implemented to guarantee correct behavior with no
589: * regressions.</p>
590: *
591: * @param spec String representation being parsed
592: * @param start Starting offset, which will be just after the ':' (if
593: * there is one) that determined the protocol name
594: * @param limit Ending position, which will be the position of the '#'
595: * (if there is one) that delimited the anchor
596: *
597: * @exception MalformedURLException if a parsing error occurs
598: */
599: private void parse(String spec, int start, int limit)
600: throws MalformedURLException {
601:
602: // Trim the query string (if any) off the tail end
603: int question = spec.lastIndexOf('?', limit - 1);
604: if ((question >= 0) && (question < limit)) {
605: query = spec.substring(question + 1, limit);
606: limit = question;
607: } else {
608: query = null;
609: }
610:
611: // Parse the authority section
612: if (spec.indexOf("//", start) == start) {
613: int pathStart = spec.indexOf("/", start + 2);
614: if ((pathStart >= 0) && (pathStart < limit)) {
615: authority = spec.substring(start + 2, pathStart);
616: start = pathStart;
617: } else {
618: authority = spec.substring(start + 2, limit);
619: start = limit;
620: }
621: if (authority.length() > 0) {
622: int at = authority.indexOf('@');
623: if (at >= 0) {
624: userInfo = authority.substring(0, at);
625: }
626: int ipv6 = authority.indexOf('[', at + 1);
627: int hStart = at + 1;
628: if (ipv6 >= 0) {
629: hStart = ipv6;
630: ipv6 = authority.indexOf(']', ipv6);
631: if (ipv6 < 0) {
632: throw new MalformedURLException(
633: "Closing ']' not found in IPV6 address: "
634: + authority);
635: } else {
636: at = ipv6 - 1;
637: }
638: }
639:
640: int colon = authority.indexOf(':', at + 1);
641: if (colon >= 0) {
642: try {
643: port = Integer.parseInt(authority
644: .substring(colon + 1));
645: } catch (NumberFormatException e) {
646: throw new MalformedURLException(e.toString());
647: }
648: host = authority.substring(hStart, colon);
649: } else {
650: host = authority.substring(hStart);
651: port = -1;
652: }
653: }
654: }
655:
656: // Parse the path section
657: if (spec.indexOf("/", start) == start) { // Absolute path
658: path = spec.substring(start, limit);
659: if (query != null)
660: file = path + "?" + query;
661: else
662: file = path;
663: return;
664: }
665:
666: // Resolve relative path against our context's file
667: if (path == null) {
668: if (query != null)
669: file = "?" + query;
670: else
671: file = null;
672: return;
673: }
674: if (!path.startsWith("/"))
675: throw new MalformedURLException(
676: "Base path does not start with '/'");
677: if (!path.endsWith("/"))
678: path += "/../";
679: path += spec.substring(start, limit);
680: if (query != null)
681: file = path + "?" + query;
682: else
683: file = path;
684: return;
685:
686: }
687:
688: /**
689: * Determine if the character is allowed in the scheme of a URI.
690: * See RFC 2396, Section 3.1
691: */
692: public static boolean isSchemeChar(char c) {
693: return Character.isLetterOrDigit(c) || c == '+' || c == '-'
694: || c == '.';
695: }
696:
697: }
|