001: /*
002: *
003: *
004: * Copyright 1990-2007 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: */
026:
027: package com.sun.midp.io;
028:
029: import java.io.IOException;
030:
031: /**
032: * A parsed HTTP (or subclass of) URL. Based on RFC 2396.
033: * <p>
034: * Handles IPv6 hosts, check host[0] for a "[".
035: * Can be used for relative URL's that do not have authorities.
036: * Can be used for FTP URL's that do not have the username and passwords.
037: * <p>
038: * Any elements not specified are represented by null, except a
039: * non-specified port, which is represented by a -1.
040: */
041: public class HttpUrl {
042: /** Scheme of the URL or null. */
043: public String scheme;
044: /** Authority (host [port]) of the URL. */
045: public String authority;
046: /** Path of the URL or null. */
047: public String path;
048: /** Query of the URL or null. */
049: public String query;
050: /** Fragment of the URL or null. */
051: public String fragment;
052: /** hHst of the authority or null. */
053: public String host;
054: /** Port of the authority or -1 for not specified. */
055: public int port = -1;
056: /** Machine of the host or null. */
057: public String machine;
058: /** Domain of the host or null. */
059: public String domain;
060:
061: /**
062: * Construct a HttpUrl.
063: *
064: * @param url HTTP URL to parse
065: *
066: * @exception IllegalArgumentException if there is a space in the URL or
067: * the port is not numeric
068: */
069: public HttpUrl(String url) {
070: int afterScheme = 0;
071: int length;
072: int endOfScheme;
073:
074: if (url == null) {
075: return;
076: }
077:
078: length = url.length();
079: if (length == 0) {
080: return;
081: }
082:
083: // ":" can mark a the scheme in a absolute URL which has a "//".
084: endOfScheme = url.indexOf(':');
085: if (endOfScheme != -1) {
086: if (endOfScheme == length - 1) {
087: // just a scheme
088: scheme = url.substring(0, endOfScheme);
089: return;
090: }
091:
092: if (endOfScheme < length - 2
093: && url.charAt(endOfScheme + 1) == '/'
094: && url.charAt(endOfScheme + 2) == '/') {
095: // found "://", get the scheme
096: scheme = url.substring(0, endOfScheme);
097: afterScheme = endOfScheme + 1;
098: }
099: }
100:
101: parseAfterScheme(url, afterScheme, length);
102: }
103:
104: /**
105: * Construct a HttpUrl from a scheme and partial HTTP URL.
106: *
107: * @param theScheme the protocol component of an HTTP URL
108: * @param partialUrl HTTP URL to parse
109: *
110: * @exception IllegalArgumentException if there is a space in the URL or
111: * the port is not numeric
112: */
113: public HttpUrl(String theScheme, String partialUrl) {
114: int length;
115:
116: scheme = theScheme;
117:
118: if (partialUrl == null) {
119: return;
120: }
121:
122: length = partialUrl.length();
123: if (length == 0) {
124: return;
125: }
126:
127: parseAfterScheme(partialUrl, 0, length);
128: }
129:
130: /**
131: * Parse the part of the HTTP URL after the scheme.
132: *
133: * @param url the part of the HTTP URL after the ":" of the scheme
134: * @param afterScheme index of the first char after the scheme
135: * @param length length of the url
136: *
137: * @exception IllegalArgumentException if there is a space in the URL or
138: * the port is not numeric
139: */
140: private void parseAfterScheme(String url, int afterScheme,
141: int length) {
142: int start;
143: int startOfAuthority;
144: int endOfUrl;
145: int endOfAuthority;
146: int endOfPath;
147: int endOfQuery;
148: int endOfHost;
149: int startOfPort;
150: int endOfPort;
151: int lastDot;
152: int startOfDomain;
153:
154: if (url.indexOf(' ') != -1 || url.indexOf('\r') != -1
155: || url.indexOf('\n') != -1
156: || url.indexOf('\u0007') != -1) {
157: throw new IllegalArgumentException("Space character in URL");
158: }
159:
160: endOfUrl = length;
161: endOfAuthority = endOfUrl;
162: endOfPath = endOfUrl;
163: endOfQuery = endOfUrl;
164:
165: if (url.startsWith("//", afterScheme)) {
166: // do not include the "//"
167: startOfAuthority = afterScheme + 2;
168: } else {
169: // no authority, the path starts at 0 and may not begin with a "/"
170: startOfAuthority = afterScheme;
171: }
172:
173: /*
174: * Since all of the elements after the authority are optional
175: * and they can contain the delimiter of the element before it.
176: * Work backwards since we know the end of the last item and will
177: * know the end of the next item when find the start of the current
178: * item.
179: */
180: start = url.indexOf('#', startOfAuthority);
181: if (start != -1) {
182: endOfAuthority = start;
183: endOfPath = start;
184: endOfQuery = start;
185:
186: // do not include the "#"
187: start++;
188:
189: // do not parse an empty fragment
190: if (start < endOfUrl) {
191: fragment = url.substring(start, endOfUrl);
192: }
193: }
194:
195: start = url.indexOf('?', startOfAuthority);
196: if (start != -1 && start < endOfQuery) {
197: endOfAuthority = start;
198: endOfPath = start;
199:
200: // do not include the "?"
201: start++;
202:
203: // do not parse an empty query
204: if (start < endOfQuery) {
205: query = url.substring(start, endOfQuery);
206: }
207: }
208:
209: if (startOfAuthority == afterScheme) {
210: // no authority, the path starts after scheme
211: start = afterScheme;
212: } else {
213: // this is not relative URL so the path must begin with "/"
214: start = url.indexOf('/', startOfAuthority);
215: }
216:
217: // do not parse an empty path
218: if (start != -1 && start < endOfPath) {
219: endOfAuthority = start;
220:
221: path = url.substring(start, endOfPath);
222: }
223:
224: if (startOfAuthority >= endOfAuthority) {
225: return;
226: }
227:
228: authority = url.substring(startOfAuthority, endOfAuthority);
229: endOfPort = authority.length();
230:
231: // get the port first, to find the end of the host
232:
233: // IPv6 address have brackets around them and can have ":"'s
234: start = authority.indexOf(']');
235: if (start == -1) {
236: startOfPort = authority.indexOf(':');
237: } else {
238: startOfPort = authority.indexOf(':', start);
239: }
240:
241: if (startOfPort != -1) {
242: endOfHost = startOfPort;
243:
244: // do not include the ":"
245: startOfPort++;
246:
247: // do not try parse an empty port
248: if (startOfPort < endOfPort) {
249: try {
250: port = Integer.parseInt(authority.substring(
251: startOfPort, endOfPort));
252:
253: if (port < 0) {
254: throw new IllegalArgumentException(
255: "invalid port format");
256: }
257:
258: if (port == 0 || port > 0xFFFF) {
259: throw new IllegalArgumentException(
260: "port out of legal range");
261: }
262: } catch (NumberFormatException nfe) {
263: throw new IllegalArgumentException(
264: "invalid port format");
265: }
266: }
267: } else {
268: endOfHost = endOfPort;
269: }
270:
271: // there could be a port but no host
272: if (endOfHost < 1) {
273: return;
274: }
275:
276: // get the host
277: host = authority.substring(0, endOfHost);
278: // the last char of the host must not be a minus sign or period
279: int hostLength = host.length();
280: if ((host.lastIndexOf('.') == hostLength - 1)
281: || (host.lastIndexOf('-') == hostLength - 1)) {
282: throw new IllegalArgumentException("invalid host format");
283: }
284:
285: // find the machine name and domain, if not host is not an IP address
286: if (host.charAt(0) == '[') {
287: // IP v6 address
288: return;
289: }
290:
291: lastDot = host.lastIndexOf('.');
292: if (lastDot != -1 && host.length() > (lastDot + 1)
293: && Character.isDigit(host.charAt(lastDot + 1))) {
294: // IP v4 address
295: return;
296: }
297:
298: startOfDomain = host.indexOf('.');
299: if (startOfDomain != -1) {
300: // do not include the "."
301: startOfDomain++;
302: if (startOfDomain < host.length()) {
303: domain = host.substring(startOfDomain, host.length());
304: }
305:
306: machine = host.substring(0, startOfDomain - 1);
307: } else {
308: machine = host;
309: }
310: }
311:
312: /**
313: * Adds a base URL to this URL if this URL is a relative one.
314: * Afterwards this URL will be an absolute URL.
315: *
316: * @param baseUrl an absolute URL
317: *
318: * @exception IllegalArgumentException if there is a space in the URL or
319: * the port is not numeric
320: * @exception IOException if an I/O error occurs processing the URL
321: */
322: public void addBaseUrl(String baseUrl) throws IOException {
323: addBaseUrl(new HttpUrl(baseUrl));
324: }
325:
326: /**
327: * Adds a base URL to this URL if this URL is a relative one.
328: * Afterwards this URL will be an absolute URL.
329: *
330: * @param baseUrl a parsed absolute URL
331: */
332: public void addBaseUrl(HttpUrl baseUrl) {
333: String basePath;
334:
335: if (authority != null) {
336: return;
337: }
338:
339: scheme = baseUrl.scheme;
340: authority = baseUrl.authority;
341:
342: if (path == null) {
343: path = baseUrl.path;
344: return;
345: }
346:
347: if (path.charAt(0) == '/' || baseUrl.path == null
348: || baseUrl.path.charAt(0) != '/') {
349: return;
350: }
351:
352: // find the base path
353: basePath = baseUrl.path.substring(0, baseUrl.path
354: .lastIndexOf('/'));
355:
356: path = basePath + '/' + path;
357: }
358:
359: /**
360: * Converts this URL into a string.
361: *
362: * @return string representation of this URL
363: */
364: public String toString() {
365: StringBuffer url = new StringBuffer();
366:
367: if (scheme != null) {
368: url.append(scheme);
369: url.append(':');
370: }
371:
372: if (authority != null || scheme != null) {
373: url.append('/');
374: url.append('/');
375: }
376:
377: if (authority != null) {
378: url.append(authority);
379: }
380:
381: if (path != null) {
382: url.append(path);
383: }
384:
385: if (query != null) {
386: url.append('?');
387: url.append(query);
388: }
389:
390: if (fragment != null) {
391: url.append('#');
392: url.append(fragment);
393: }
394:
395: return url.toString();
396: }
397: }
|