001: /*
002: * @(#)URL.java 1.106 05/03/12
003: *
004: * Copyright 1990-2007 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package com.sun.perseus.platform;
029:
030: import java.io.IOException;
031: import java.io.InputStream;
032: import java.io.OutputStream;
033: import java.util.Hashtable;
034:
035: /**
036: * Class <code>URL</code> represents a Uniform Resource
037: * Locator, a pointer to a "resource" on the World
038: * Wide Web. A resource can be something as simple as a file or a
039: * directory, or it can be a reference to a more complicated object,
040: * such as a query to a database or to a search engine. More
041: * information on the types of URLs and their formats can be found at:
042: * <blockquote>
043: * <a href="http://archive.ncsa.uiuc.edu/SDG/Software/Mosaic/Demo/url-primer.html">
044: * <i>http://archive.ncsa.uiuc.edu/SDG/Software/Mosaic/Demo/url-primer.html</i></a>
045: * </blockquote>
046: * <p>
047: * In general, a URL can be broken into several parts. The previous
048: * example of a URL indicates that the protocol to use is
049: * <code>http</code> (HyperText Transfer Protocol) and that the
050: * information resides on a host machine named
051: * <code>www.ncsa.uiuc.edu</code>. The information on that host
052: * machine is named <code>/SDG/Software/Mosaic/Demo/url-primer.html</code>. The exact
053: * meaning of this name on the host machine is both protocol
054: * dependent and host dependent. The information normally resides in
055: * a file, but it could be generated on the fly. This component of
056: * the URL is called the <i>path</i> component.
057: * <p>
058: * A URL can optionally specify a "port", which is the
059: * port number to which the TCP connection is made on the remote host
060: * machine. If the port is not specified, the default port for
061: * the protocol is used instead. For example, the default port for
062: * <code>http</code> is <code>80</code>. An alternative port could be
063: * specified as:
064: * <blockquote><pre>
065: * http://archive.ncsa.uiuc.edu:80/SDG/Software/Mosaic/Demo/url-primer.html
066: * </pre></blockquote>
067: * <p>
068: * The syntax of <code>URL</code> is defined by <a
069: * href="http://www.ietf.org/rfc/rfc2396.txt""><i>RFC 2396: Uniform
070: * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
071: * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
072: * Literal IPv6 Addresses in URLs</i></a>.
073: * <p>
074: * A URL may have appended to it a "fragment", also known
075: * as a "ref" or a "reference". The fragment is indicated by the sharp
076: * sign character "#" followed by more characters. For example,
077: * <blockquote><pre>
078: * http://java.sun.com/index.html#chapter1
079: * </pre></blockquote>
080: * <p>
081: * This fragment is not technically part of the URL. Rather, it
082: * indicates that after the specified resource is retrieved, the
083: * application is specifically interested in that part of the
084: * document that has the tag <code>chapter1</code> attached to it. The
085: * meaning of a tag is resource specific.
086: * <p>
087: * An application can also specify a "relative URL",
088: * which contains only enough information to reach the resource
089: * relative to another URL. Relative URLs are frequently used within
090: * HTML pages. For example, if the contents of the URL:
091: * <blockquote><pre>
092: * http://java.sun.com/index.html
093: * </pre></blockquote>
094: * contained within it the relative URL:
095: * <blockquote><pre>
096: * FAQ.html
097: * </pre></blockquote>
098: * it would be a shorthand for:
099: * <blockquote><pre>
100: * http://java.sun.com/FAQ.html
101: * </pre></blockquote>
102: * <p>
103: * The relative URL need not specify all the components of a URL. If
104: * the protocol, host name, or port number is missing, the value is
105: * inherited from the fully specified URL. The file component must be
106: * specified. The optional fragment is not inherited.
107: *
108: */
109: public final class PURL {
110:
111: static final long serialVersionUID = -7627629688361524110L;
112:
113: /**
114: * The property which specifies the package prefix list to be scanned
115: * for protocol handlers. The value of this property (if any) should
116: * be a vertical bar delimited list of package names to search through
117: * for a protocol handler to load. The policy of this class is that
118: * all protocol handlers will be in a class called <protocolname>.Handler,
119: * and each package in the list is examined in turn for a matching
120: * handler. If none are found (or the property is not specified), the
121: * default package prefix, sun.net.www.protocol, is used. The search
122: * proceeds from the first package in the list to the last and stops
123: * when a match is found.
124: */
125: private static final String protocolPathProp = "java.protocol.handler.pkgs";
126:
127: /**
128: * The protocol to use (ftp, http, nntp, ... etc.) .
129: * @serial
130: */
131: private String protocol;
132:
133: /**
134: * The host name to connect to.
135: * @serial
136: */
137: private String host;
138:
139: /**
140: * The protocol port to connect to.
141: * @serial
142: */
143: private int port = -1;
144:
145: /**
146: * The specified file name on that host. <code>file</code> is
147: * defined as <code>path[?query]</code>
148: * @serial
149: */
150: private String file;
151:
152: /**
153: * The query part of this URL.
154: */
155: private transient String query;
156:
157: /**
158: * The authority part of this URL.
159: * @serial
160: */
161: private String authority;
162:
163: /**
164: * The path part of this URL.
165: */
166: private transient String path;
167:
168: /**
169: * The userinfo part of this URL.
170: */
171: private transient String userInfo;
172:
173: /**
174: * # reference.
175: * @serial
176: */
177: private String ref;
178:
179: /**
180: * The host's IP address, used in equals and hashCode.
181: * Computed on demand. An uninitialized or unknown hostAddress is null.
182: */
183: transient Object hostAddress;
184:
185: /* Our hash code.
186: * @serial
187: */
188: private int hashCode = -1;
189:
190: /**
191: * Creates a <code>URL</code> object from the <code>String</code>
192: * representation.
193: * <p>
194: * This constructor is equivalent to a call to the two-argument
195: * constructor with a <code>null</code> first argument.
196: *
197: * @param spec the <code>String</code> to parse as a URL.
198: * @exception Error If the string specifies an
199: * unknown protocol.
200: * @see java.net.URL#URL(java.net.URL, java.lang.String)
201: */
202: public PURL(String spec) throws Error {
203: this (null, spec);
204: }
205:
206: /**
207: * Creates a URL by parsing the given spec within a specified context.
208: *
209: * The new URL is created from the given context URL and the spec
210: * argument as described in
211: * RFC2396 "Uniform Resource Identifiers : Generic * Syntax" :
212: * <blockquote><pre>
213: * <scheme>://<authority><path>?<query>#<fragment>
214: * </pre></blockquote>
215: * The reference is parsed into the scheme, authority, path, query and
216: * fragment parts. If the path component is empty and the scheme,
217: * authority, and query components are undefined, then the new URL is a
218: * reference to the current document. Otherwise, the fragment and query
219: * parts present in the spec are used in the new URL.
220: * <p>
221: * If the scheme component is defined in the given spec and does not match
222: * the scheme of the context, then the new URL is created as an absolute
223: * URL based on the spec alone. Otherwise the scheme component is inherited
224: * from the context URL.
225: * <p>
226: * If the authority component is present in the spec then the spec is
227: * treated as absolute and the spec authority and path will replace the
228: * context authority and path. If the authority component is absent in the
229: * spec then the authority of the new URL will be inherited from the
230: * context.
231: * <p>
232: * If the spec's path component begins with a slash character
233: * "/" then the
234: * path is treated as absolute and the spec path replaces the context path.
235: * <p>
236: * Otherwise, the path is treated as a relative path and is appended to the
237: * context path, as described in RFC2396. Also, in this case,
238: * the path is canonicalized through the removal of directory
239: * changes made by occurences of ".." and ".".
240: * <p>
241: * For a more detailed description of URL parsing, refer to RFC2396.
242: *
243: * @param context the context in which to parse the specification.
244: * @param spec the <code>String</code> to parse as a URL.
245: * @exception Error if no protocol is specified, or an
246: * unknown protocol is found.
247: * @see java.net.URL#URL(java.lang.String, java.lang.String,
248: * int, java.lang.String)
249: * @see java.net.URLStreamHandler
250: * @see java.net.URLStreamHandler#parseURL(java.net.URL,
251: * java.lang.String, int, int)
252: */
253: public PURL(PURL context, String spec) throws Error {
254: this (context, spec, null);
255: }
256:
257: /**
258: * Creates a URL by parsing the given spec with the specified handler
259: * within a specified context. If the handler is null, the parsing
260: * occurs as with the two argument constructor.
261: *
262: * @param context the context in which to parse the specification.
263: * @param spec the <code>String</code> to parse as a URL.
264: * @param handler the stream handler for the URL.
265: * @exception Error if no protocol is specified, or an
266: * unknown protocol is found.
267: * @exception SecurityException
268: * if a security manager exists and its
269: * <code>checkPermission</code> method doesn't allow
270: * specifying a stream handler.
271: * @see java.net.URL#URL(java.lang.String, java.lang.String,
272: * int, java.lang.String)
273: * @see java.net.URLStreamHandler
274: * @see java.net.URLStreamHandler#parseURL(java.net.URL,
275: * java.lang.String, int, int)
276: */
277: public PURL(PURL context, String spec, Object handler) throws Error {
278: String original = spec;
279: int i, limit, c;
280: int start = 0;
281: String newProtocol = null;
282: boolean aRef = false;
283: boolean isRelative = false;
284:
285: // Check for permission to specify a handler
286: if (handler != null) {
287: // SecurityManager sm = System.getSecurityManager();
288: // if (sm != null) {
289: // checkSpecifyHandler(sm);
290: // }
291: }
292:
293: try {
294: limit = spec.length();
295: while ((limit > 0) && (spec.charAt(limit - 1) <= ' ')) {
296: limit--; //eliminate trailing whitespace
297: }
298: while ((start < limit) && (spec.charAt(start) <= ' ')) {
299: start++; // eliminate leading whitespace
300: }
301:
302: if (spec.regionMatches(true, start, "url:", 0, 4)) {
303: start += 4;
304: }
305: if (start < spec.length() && spec.charAt(start) == '#') {
306: /* we're assuming this is a ref relative to the context URL.
307: * This means protocols cannot start w/ '#', but we must parse
308: * ref URL's like: "hello:there" w/ a ':' in them.
309: */
310: aRef = true;
311: }
312: for (i = start; !aRef && (i < limit)
313: && ((c = spec.charAt(i)) != '/'); i++) {
314: if (c == ':') {
315:
316: String s = spec.substring(start, i).toLowerCase();
317: if (isValidProtocol(s)) {
318: newProtocol = s;
319: start = i + 1;
320: }
321: break;
322: }
323: }
324:
325: // Only use our context if the protocols match.
326: protocol = newProtocol;
327: if ((context != null)
328: && ((newProtocol == null) || newProtocol
329: .equalsIgnoreCase(context.protocol))) {
330:
331: // If the context is a hierarchical URL scheme and the spec
332: // contains a matching scheme then maintain backwards
333: // compatibility and treat it as if the spec didn't contain
334: // the scheme; see 5.2.3 of RFC2396
335: if (context.path != null
336: && context.path.startsWith("/"))
337: newProtocol = null;
338:
339: if (newProtocol == null) {
340: protocol = context.protocol;
341: authority = context.authority;
342: userInfo = context.userInfo;
343: host = context.host;
344: port = context.port;
345: file = context.file;
346: path = context.path;
347: isRelative = true;
348: }
349: }
350:
351: if (protocol == null) {
352: throw new Error("no protocol: " + original);
353: }
354:
355: i = spec.indexOf('#', start);
356: if (i >= 0) {
357: ref = spec.substring(i + 1, limit);
358: limit = i;
359: }
360:
361: /*
362: * Handle special case inheritance of query and fragment
363: * implied by RFC2396 section 5.2.2.
364: */
365: if (isRelative && start == limit) {
366: query = context.query;
367: if (ref == null) {
368: ref = context.ref;
369: }
370: }
371:
372: PURLStreamHandler.parseURL(this , spec, start, limit);
373:
374: } catch (Error e) {
375: throw e;
376: } catch (Exception e) {
377: throw new Error(e.getMessage());
378: }
379: }
380:
381: /*
382: * Returns true if specified string is a valid protocol name.
383: */
384: private boolean isValidProtocol(String protocol) {
385: int len = protocol.length();
386: if (len < 1)
387: return false;
388: char c = protocol.charAt(0);
389: if (!(Character.isLowerCase(c) || Character.isUpperCase(c)))
390: return false;
391: for (int i = 1; i < len; i++) {
392: c = protocol.charAt(i);
393: if (!(Character.isLowerCase(c) || Character.isUpperCase(c) || Character
394: .isDigit(c))
395: && c != '.' && c != '+' && c != '-') {
396: return false;
397: }
398: }
399: return true;
400: }
401:
402: /**
403: * Sets the fields of the URL. This is not a public method so that
404: * only URLStreamHandlers can modify URL fields. URLs are
405: * otherwise constant.
406: *
407: * @param protocol the name of the protocol to use
408: * @param host the name of the host
409: @param port the port number on the host
410: * @param file the file on the host
411: * @param ref the internal reference in the URL
412: */
413: protected void set(String protocol, String host, int port,
414: String file, String ref) {
415: synchronized (this ) {
416: this .protocol = protocol;
417: this .host = host;
418: authority = port == -1 ? host : host + ":" + port;
419: this .port = port;
420: this .file = file;
421: this .ref = ref;
422: /* This is very important. We must recompute this after the
423: * URL has been changed. */
424: hashCode = -1;
425: hostAddress = null;
426: int q = file.lastIndexOf('?');
427: if (q != -1) {
428: query = file.substring(q + 1);
429: path = file.substring(0, q);
430: } else
431: path = file;
432: }
433: }
434:
435: /**
436: * Sets the specified 8 fields of the URL. This is not a public method so
437: * that only URLStreamHandlers can modify URL fields. URLs are otherwise
438: * constant.
439: *
440: * @param protocol the name of the protocol to use
441: * @param host the name of the host
442: * @param port the port number on the host
443: * @param authority the authority part for the url
444: * @param userInfo the username and password
445: * @param path the file on the host
446: * @param ref the internal reference in the URL
447: * @param query the query part of this URL
448: * @since 1.3
449: */
450: protected void set(String protocol, String host, int port,
451: String authority, String userInfo, String path,
452: String query, String ref) {
453: synchronized (this ) {
454: this .protocol = protocol;
455: this .host = host;
456: this .port = port;
457: this .file = query == null ? path : path + "?" + query;
458: this .userInfo = userInfo;
459: this .path = path;
460: this .ref = ref;
461: /* This is very important. We must recompute this after the
462: * URL has been changed. */
463: hashCode = -1;
464: hostAddress = null;
465: this .query = query;
466: this .authority = authority;
467: }
468: }
469:
470: /**
471: * Gets the query part of this <code>URL</code>.
472: *
473: * @return the query part of this <code>URL</code>,
474: * or <CODE>null</CODE> if one does not exist
475: * @since 1.3
476: */
477: public String getQuery() {
478: return query;
479: }
480:
481: /**
482: * Gets the path part of this <code>URL</code>.
483: *
484: * @return the path part of this <code>URL</code>, or an
485: * empty string if one does not exist
486: * @since 1.3
487: */
488: public String getPath() {
489: return path;
490: }
491:
492: /**
493: * Gets the userInfo part of this <code>URL</code>.
494: *
495: * @return the userInfo part of this <code>URL</code>, or
496: * <CODE>null</CODE> if one does not exist
497: */
498: public String getUserInfo() {
499: return userInfo;
500: }
501:
502: /**
503: * Gets the authority part of this <code>URL</code>.
504: *
505: * @return the authority part of this <code>URL</code>
506: * @since 1.3
507: */
508: public String getAuthority() {
509: return authority;
510: }
511:
512: /**
513: * Gets the port number of this <code>URL</code>.
514: *
515: * @return the port number, or -1 if the port is not set
516: */
517: public int getPort() {
518: return port;
519: }
520:
521: /**
522: * Gets the default port number of the protocol associated
523: * with this <code>URL</code>. If the URL scheme or the URLStreamHandler
524: * for the URL do not define a default port number,
525: * then -1 is returned.
526: *
527: * @return the port number
528: */
529: public int getDefaultPort() {
530: return -1;
531: //return handler.getDefaultPort();
532: }
533:
534: /**
535: * Gets the protocol name of this <code>URL</code>.
536: *
537: * @return the protocol of this <code>URL</code>.
538: */
539: public String getProtocol() {
540: return protocol;
541: }
542:
543: /**
544: * Gets the host name of this <code>URL</code>, if applicable.
545: * The format of the host conforms to RFC 2732, i.e. for a
546: * literal IPv6 address, this method will return the IPv6 address
547: * enclosed in square brackets (<tt>'['</tt> and <tt>']'</tt>).
548: *
549: * @return the host name of this <code>URL</code>.
550: */
551: public String getHost() {
552: return host;
553: }
554:
555: /**
556: * Gets the file name of this <code>URL</code>.
557: * The returned file portion will be
558: * the same as <CODE>getPath()</CODE>, plus the concatenation of
559: * the value of <CODE>getQuery()</CODE>, if any. If there is
560: * no query portion, this method and <CODE>getPath()</CODE> will
561: * return identical results.
562: *
563: * @return the file name of this <code>URL</code>,
564: * or an empty string if one does not exist
565: */
566: public String getFile() {
567: return file;
568: }
569:
570: /**
571: * Gets the anchor (also known as the "reference") of this
572: * <code>URL</code>.
573: *
574: * @return the anchor (also known as the "reference") of this
575: * <code>URL</code>, or <CODE>null</CODE> if one does not exist
576: */
577: public String getRef() {
578: return ref;
579: }
580:
581: /**
582: * Constructs a string representation of this <code>URL</code>. The
583: * string is created by calling the <code>toExternalForm</code>
584: * method of the stream protocol handler for this object.
585: *
586: * @return a string representation of this object.
587: * @see java.net.URL#URL(java.lang.String, java.lang.String, int,
588: * java.lang.String)
589: * @see java.net.URLStreamHandler#toExternalForm(java.net.URL)
590: */
591: public String toString() {
592: return toExternalForm();
593: }
594:
595: /**
596: * Constructs a string representation of this <code>URL</code>. The
597: * string is created by calling the <code>toExternalForm</code>
598: * method of the stream protocol handler for this object.
599: *
600: * @return a string representation of this object.
601: * @see java.net.URL#URL(java.lang.String, java.lang.String,
602: * int, java.lang.String)
603: * @see java.net.URLStreamHandler#toExternalForm(java.net.URL)
604: */
605: public String toExternalForm() {
606: return PURLStreamHandler.toExternalForm(this );
607: }
608:
609: }
610:
611: class Parts {
612: String path, query, ref;
613:
614: Parts(String file) {
615: int ind = file.indexOf('#');
616: ref = ind < 0 ? null : file.substring(ind + 1);
617: file = ind < 0 ? file : file.substring(0, ind);
618: int q = file.lastIndexOf('?');
619: if (q != -1) {
620: query = file.substring(q + 1);
621: path = file.substring(0, q);
622: } else {
623: path = file;
624: }
625: }
626:
627: String getPath() {
628: return path;
629: }
630:
631: String getQuery() {
632: return query;
633: }
634:
635: String getRef() {
636: return ref;
637: }
638: }
|