0001: /*
0002: * @(#)URI.java 0.3-2 18/06/1999
0003: *
0004: * This file is part of the HTTPClient package
0005: * Copyright (C) 1996-1999 Ronald Tschalär
0006: *
0007: * This library is free software; you can redistribute it and/or
0008: * modify it under the terms of the GNU Lesser General Public
0009: * License as published by the Free Software Foundation; either
0010: * version 2 of the License, or (at your option) any later version.
0011: *
0012: * This library is distributed in the hope that it will be useful,
0013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0015: * Lesser General Public License for more details.
0016: *
0017: * You should have received a copy of the GNU Lesser General Public
0018: * License along with this library; if not, write to the Free
0019: * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
0020: * MA 02111-1307, USA
0021: *
0022: * For questions, suggestions, bug-reports, enhancement-requests etc.
0023: * I may be contacted at:
0024: *
0025: * ronald@innovation.ch
0026: *
0027: */
0028:
0029: package HTTPClient;
0030:
0031: import java.net.URL;
0032: import java.net.MalformedURLException;
0033: import java.util.BitSet;
0034:
0035: /**
0036: * This class represents a generic URI, as defined in RFC-2396.
0037: * This is similar to java.net.URL, with the following enhancements:
0038: * <UL>
0039: * <LI>it doesn't require a URLStreamhandler to exist for the scheme; this
0040: * allows this class to be used to hold any URI, construct absolute
0041: * URIs from relative ones, etc.
0042: * <LI>it handles escapes correctly
0043: * <LI>equals() works correctly
0044: * <LI>relative URIs are correctly constructed
0045: * <LI>it has methods for accessing various fields such as userinfo,
0046: * fragment, params, etc.
0047: * <LI>it handles less common forms of resources such as the "*" used in
0048: * http URLs.
0049: * </UL>
0050: *
0051: * <P>Ideally, java.net.URL should subclass URI.
0052: *
0053: * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
0054: * @version 0.3-2 18/06/1999
0055: * @author Ronald Tschalär
0056: * @since V0.3-1
0057: */
0058:
0059: public class URI {
0060: /* various character classes as defined in the draft */
0061: protected static BitSet alphanumChar;
0062: protected static BitSet markChar;
0063: protected static BitSet reservedChar;
0064: protected static BitSet unreservedChar;
0065: protected static BitSet uricChar;
0066: protected static BitSet pcharChar;
0067: protected static BitSet userinfoChar;
0068: protected static BitSet schemeChar;
0069: protected static BitSet reg_nameChar;
0070:
0071: static {
0072: alphanumChar = new BitSet(128);
0073: for (int ch = '0'; ch <= '9'; ch++)
0074: alphanumChar.set(ch);
0075: for (int ch = 'A'; ch <= 'Z'; ch++)
0076: alphanumChar.set(ch);
0077: for (int ch = 'a'; ch <= 'z'; ch++)
0078: alphanumChar.set(ch);
0079:
0080: markChar = new BitSet(128);
0081: markChar.set('-');
0082: markChar.set('_');
0083: markChar.set('.');
0084: markChar.set('!');
0085: markChar.set('~');
0086: markChar.set('*');
0087: markChar.set('\'');
0088: markChar.set('(');
0089: markChar.set(')');
0090:
0091: reservedChar = new BitSet(128);
0092: reservedChar.set(';');
0093: reservedChar.set('/');
0094: reservedChar.set('?');
0095: reservedChar.set(':');
0096: reservedChar.set('@');
0097: reservedChar.set('&');
0098: reservedChar.set('=');
0099: reservedChar.set('+');
0100: reservedChar.set('$');
0101: reservedChar.set(',');
0102:
0103: unreservedChar = new BitSet(128);
0104: unreservedChar.or(alphanumChar);
0105: unreservedChar.or(markChar);
0106:
0107: uricChar = new BitSet(128);
0108: uricChar.or(unreservedChar);
0109: uricChar.or(reservedChar);
0110:
0111: pcharChar = new BitSet(128);
0112: pcharChar.or(unreservedChar);
0113: pcharChar.set(':');
0114: pcharChar.set('@');
0115: pcharChar.set('&');
0116: pcharChar.set('=');
0117: pcharChar.set('+');
0118: pcharChar.set('$');
0119: pcharChar.set(',');
0120:
0121: userinfoChar = new BitSet(128);
0122: userinfoChar.or(unreservedChar);
0123: userinfoChar.set(';');
0124: userinfoChar.set(':');
0125: userinfoChar.set('&');
0126: userinfoChar.set('=');
0127: userinfoChar.set('+');
0128: userinfoChar.set('$');
0129: userinfoChar.set(',');
0130:
0131: // this actually shouldn't contain uppercase letters...
0132: schemeChar = new BitSet(128);
0133: schemeChar.or(alphanumChar);
0134: schemeChar.set('+');
0135: schemeChar.set('-');
0136: schemeChar.set('.');
0137:
0138: reg_nameChar = new BitSet(128);
0139: reg_nameChar.or(unreservedChar);
0140: reg_nameChar.set('$');
0141: reg_nameChar.set(',');
0142: reg_nameChar.set(';');
0143: reg_nameChar.set(':');
0144: reg_nameChar.set('@');
0145: reg_nameChar.set('&');
0146: reg_nameChar.set('=');
0147: reg_nameChar.set('+');
0148: }
0149:
0150: /* our uri in pieces */
0151:
0152: protected boolean is_generic;
0153: protected String scheme;
0154: protected String opaque;
0155: protected String userinfo;
0156: protected String host;
0157: protected int port = -1;
0158: protected String path;
0159: protected String query;
0160: protected String fragment;
0161:
0162: /* cache the java.net.URL */
0163:
0164: protected URL url = null;
0165:
0166: // Constructors
0167:
0168: /**
0169: * Constructs a URI from the given string representation. The string
0170: * must be an absolute URI.
0171: *
0172: * @param uri a String containing an absolute URI
0173: * @exception ParseException if no scheme can be found or a specified
0174: * port cannot be parsed as a number
0175: */
0176: public URI(String uri) throws ParseException {
0177: this ((URI) null, uri);
0178: }
0179:
0180: /**
0181: * Constructs a URI from the given string representation, relative to
0182: * the given base URI.
0183: *
0184: * @param base the base URI, relative to which <var>rel_uri</var>
0185: * is to be parsed
0186: * @param rel_uri a String containing a relative or absolute URI
0187: * @exception ParseException if <var>base</var> is null and
0188: * <var>rel_uri</var> is not an absolute URI, or
0189: * if <var>base</var> is not null and the scheme
0190: * is not known to use the generic syntax, or
0191: * if a given port cannot be parsed as a number
0192: */
0193: public URI(URI base, String rel_uri) throws ParseException {
0194: /* Parsing is done according to the following RE:
0195: *
0196: * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
0197: * 12 3 4 5 6 7 8 9
0198: *
0199: * 2: scheme
0200: * 4: authority
0201: * 5: path
0202: * 7: query
0203: * 9: fragment
0204: */
0205:
0206: char[] uri = rel_uri.toCharArray();
0207: int pos = 0, idx, len = uri.length;
0208:
0209: // trim()
0210:
0211: while (pos < len && Character.isSpace(uri[pos]))
0212: pos++;
0213: while (len > 0 && Character.isSpace(uri[len - 1]))
0214: len--;
0215:
0216: // strip the special "url" or "uri" scheme
0217:
0218: if (pos < len - 3
0219: && uri[pos + 3] == ':'
0220: && (uri[pos + 0] == 'u' || uri[pos + 0] == 'U')
0221: && (uri[pos + 1] == 'r' || uri[pos + 1] == 'R')
0222: && (uri[pos + 2] == 'i' || uri[pos + 2] == 'I'
0223: || uri[pos + 2] == 'l' || uri[pos + 2] == 'L'))
0224: pos += 4;
0225:
0226: // get scheme: (([^:/?#]+):)?
0227:
0228: idx = pos;
0229: while (idx < len && uri[idx] != ':' && uri[idx] != '/'
0230: && uri[idx] != '?' && uri[idx] != '#')
0231: idx++;
0232: if (idx < len && uri[idx] == ':') {
0233: scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
0234: pos = idx + 1;
0235: }
0236:
0237: // check and resolve scheme
0238:
0239: String final_scheme = scheme;
0240: if (scheme == null) {
0241: if (base == null)
0242: throw new ParseException("No scheme found");
0243: final_scheme = base.scheme;
0244: }
0245:
0246: // check for generic vs. opaque
0247:
0248: is_generic = usesGenericSyntax(final_scheme);
0249: if (!is_generic) {
0250: if (base != null && scheme == null)
0251: throw new ParseException(
0252: "Can't resolve relative URI for " + "scheme "
0253: + final_scheme);
0254:
0255: opaque = rel_uri.substring(pos);
0256: return;
0257: }
0258:
0259: // get authority: (//([^/?#]*))?
0260:
0261: if (pos < len - 1 && uri[pos] == '/' && uri[pos + 1] == '/') {
0262: pos += 2;
0263: idx = pos;
0264: while (idx < len && uri[idx] != '/' && uri[idx] != '?'
0265: && uri[idx] != '#')
0266: idx++;
0267:
0268: parse_authority(rel_uri.substring(pos, idx), final_scheme);
0269: pos = idx;
0270: }
0271:
0272: // get path: ([^?#]*)
0273:
0274: idx = pos;
0275: while (idx < len && uri[idx] != '?' && uri[idx] != '#')
0276: idx++;
0277: this .path = rel_uri.substring(pos, idx);
0278: pos = idx;
0279:
0280: // get query: (\?([^#]*))?
0281:
0282: if (pos < len && uri[pos] == '?') {
0283: pos += 1;
0284: idx = pos;
0285: while (idx < len && uri[idx] != '#')
0286: idx++;
0287: this .query = unescape(rel_uri.substring(pos, idx));
0288: pos = idx;
0289: }
0290:
0291: // get fragment: (#(.*))?
0292:
0293: if (pos < len && uri[pos] == '#')
0294: this .fragment = unescape(rel_uri.substring(pos + 1, len));
0295:
0296: // now resolve the parts relative to the base
0297:
0298: if (base != null) {
0299: if (scheme != null)
0300: return; // resolve scheme
0301: scheme = base.scheme;
0302:
0303: if (host != null)
0304: return; // resolve authority
0305: userinfo = base.userinfo;
0306: host = base.host;
0307: port = base.port;
0308:
0309: if (path.length() == 0 && query == null) // current doc
0310: {
0311: path = base.path;
0312: query = base.query;
0313: return;
0314: }
0315:
0316: if (path.length() == 0 || path.charAt(0) != '/') // relative uri
0317: {
0318: idx = base.path.lastIndexOf('/');
0319: if (idx == -1)
0320: return; // weird one
0321: path = base.path.substring(0, idx + 1) + path;
0322:
0323: len = path.length();
0324: if (!((idx = path.indexOf("/.")) != -1 && (idx == len - 2
0325: || path.charAt(idx + 2) == '/' || (path
0326: .charAt(idx + 2) == '.' && (idx == len - 3 || path
0327: .charAt(idx + 3) == '/')))))
0328: return;
0329:
0330: char[] p = new char[path.length()]; // clean path
0331: path.getChars(0, p.length, p, 0);
0332:
0333: int beg = 0;
0334: for (idx = 1; idx < len; idx++) {
0335: if (p[idx] == '.' && p[idx - 1] == '/') {
0336: int end;
0337: if (idx == len - 1) // trailing "/."
0338: {
0339: end = idx;
0340: idx += 1;
0341: } else if (p[idx + 1] == '/') // "/./"
0342: {
0343: end = idx - 1;
0344: idx += 1;
0345: } else if (p[idx + 1] == '.'
0346: && (idx == len - 2 || p[idx + 2] == '/')) // "/../"
0347: {
0348: if (idx < beg + 2) // keep from backing up too much
0349: {
0350: beg = idx + 2;
0351: continue;
0352: }
0353:
0354: end = idx - 2;
0355: while (end > beg && p[end] != '/')
0356: end--;
0357: if (p[end] != '/')
0358: continue;
0359: if (idx == len - 2)
0360: end++;
0361: idx += 2;
0362: } else
0363: continue;
0364: System.arraycopy(p, idx, p, end, len - idx);
0365: len -= idx - end;
0366: idx = end;
0367: }
0368: }
0369: path = new String(p, 0, len);
0370: }
0371: }
0372: }
0373:
0374: /**
0375: * Parse the authority specific part
0376: */
0377: private void parse_authority(String authority, String scheme)
0378: throws ParseException {
0379: /* The authority is further parsed according to:
0380: *
0381: * ^(([^@]*)@?)([^:]*)?(:(.*))?
0382: * 12 3 4 5
0383: *
0384: * 2: userinfo
0385: * 3: host
0386: * 5: port
0387: */
0388:
0389: char[] uri = authority.toCharArray();
0390: int pos = 0, idx, len = uri.length;
0391:
0392: // get userinfo: (([^@]*)@?)
0393:
0394: idx = pos;
0395: while (idx < len && uri[idx] != '@')
0396: idx++;
0397: if (idx < len && uri[idx] == '@') {
0398: this .userinfo = unescape(authority.substring(pos, idx));
0399: pos = idx + 1;
0400: }
0401:
0402: // get host: ([^:]*)?
0403:
0404: idx = pos;
0405: while (idx < len && uri[idx] != ':')
0406: idx++;
0407: this .host = authority.substring(pos, idx);
0408: pos = idx;
0409:
0410: // get port: (:(.*))?
0411:
0412: if (pos < (len - 1) && uri[pos] == ':') {
0413: int p;
0414: try {
0415: p = Integer.parseInt(authority.substring(pos + 1, len));
0416: if (p < 0)
0417: throw new NumberFormatException();
0418: } catch (NumberFormatException e) {
0419: throw new ParseException(authority.substring(pos + 1,
0420: len)
0421: + " is an invalid port number");
0422: }
0423: if (p == defaultPort(scheme))
0424: this .port = -1;
0425: else
0426: this .port = p;
0427: }
0428: }
0429:
0430: /**
0431: * Construct a URI from the given URL.
0432: *
0433: * @param url the URL
0434: * @exception ParseException if <code>url.toExternalForm()</code> generates
0435: * an invalid string representation
0436: */
0437: public URI(URL url) throws ParseException {
0438: this ((URI) null, url.toExternalForm());
0439: }
0440:
0441: /**
0442: * Constructs a URI from the given parts, using the default port for
0443: * this scheme (if known).
0444: *
0445: * @param scheme the scheme (sometimes known as protocol)
0446: * @param host the host
0447: * @param path the path part
0448: * @exception ParseException if <var>scheme</var> is null
0449: */
0450: public URI(String scheme, String host, String path)
0451: throws ParseException {
0452: this (scheme, null, host, -1, path, null, null);
0453: }
0454:
0455: /**
0456: * Constructs a URI from the given parts.
0457: *
0458: * @param scheme the scheme (sometimes known as protocol)
0459: * @param host the host
0460: * @param port the port
0461: * @param path the path part
0462: * @exception ParseException if <var>scheme</var> is null
0463: */
0464: public URI(String scheme, String host, int port, String path)
0465: throws ParseException {
0466: this (scheme, null, host, port, path, null, null);
0467: }
0468:
0469: /**
0470: * Constructs a URI from the given parts. Any part except for the
0471: * the scheme may be null.
0472: *
0473: * @param scheme the scheme (sometimes known as protocol)
0474: * @param userinfo the userinfo
0475: * @param host the host
0476: * @param port the port
0477: * @param path the path part
0478: * @param query the query string
0479: * @param fragment the fragment identifier
0480: * @exception ParseException if <var>scheme</var> is null
0481: */
0482: public URI(String scheme, String userinfo, String host, int port,
0483: String path, String query, String fragment)
0484: throws ParseException {
0485: if (scheme == null)
0486: throw new ParseException("missing scheme");
0487: this .scheme = scheme.trim().toLowerCase();
0488: if (userinfo != null)
0489: this .userinfo = unescape(userinfo.trim());
0490: if (host != null)
0491: this .host = host.trim();
0492: if (port != defaultPort(scheme))
0493: this .port = port;
0494: if (path != null)
0495: this .path = path.trim(); // ???
0496: if (query != null)
0497: this .query = query.trim();
0498: if (fragment != null)
0499: this .fragment = fragment.trim();
0500:
0501: this .is_generic = true;
0502: }
0503:
0504: /**
0505: * Constructs an opaque URI from the given parts.
0506: *
0507: * @param scheme the scheme (sometimes known as protocol)
0508: * @param opaque the opaque part
0509: * @exception ParseException if <var>scheme</var> is null
0510: */
0511: public URI(String scheme, String opaque) throws ParseException {
0512: if (scheme == null)
0513: throw new ParseException("missing scheme");
0514: this .scheme = scheme.trim().toLowerCase();
0515: this .opaque = opaque;
0516:
0517: this .is_generic = false;
0518: }
0519:
0520: // Class Methods
0521:
0522: /**
0523: * @return true if the scheme should be parsed according to the
0524: * generic-URI syntax
0525: */
0526: public static boolean usesGenericSyntax(String scheme) {
0527: scheme = scheme.trim();
0528:
0529: if (scheme.equalsIgnoreCase("http")
0530: || scheme.equalsIgnoreCase("https")
0531: || scheme.equalsIgnoreCase("shttp")
0532: || scheme.equalsIgnoreCase("coffee")
0533: || scheme.equalsIgnoreCase("ftp")
0534: || scheme.equalsIgnoreCase("file")
0535: || scheme.equalsIgnoreCase("gopher")
0536: || scheme.equalsIgnoreCase("nntp")
0537: || scheme.equalsIgnoreCase("smtp")
0538: || scheme.equalsIgnoreCase("telnet")
0539: || scheme.equalsIgnoreCase("news")
0540: || scheme.equalsIgnoreCase("snews")
0541: || scheme.equalsIgnoreCase("hnews")
0542: || scheme.equalsIgnoreCase("rwhois")
0543: || scheme.equalsIgnoreCase("whois++")
0544: || scheme.equalsIgnoreCase("imap")
0545: || scheme.equalsIgnoreCase("pop")
0546: || scheme.equalsIgnoreCase("wais")
0547: || scheme.equalsIgnoreCase("irc")
0548: || scheme.equalsIgnoreCase("nfs")
0549: || scheme.equalsIgnoreCase("ldap")
0550: || scheme.equalsIgnoreCase("prospero")
0551: || scheme.equalsIgnoreCase("z39.50r")
0552: || scheme.equalsIgnoreCase("z39.50s")
0553: || scheme.equalsIgnoreCase("sip")
0554: || scheme.equalsIgnoreCase("sips")
0555: || scheme.equalsIgnoreCase("sipt")
0556: || scheme.equalsIgnoreCase("sipu")
0557: || scheme.equalsIgnoreCase("vemmi")
0558: || scheme.equalsIgnoreCase("videotex"))
0559: return true;
0560:
0561: /* Note: schemes which definitely don't use the generic-URI syntax
0562: * and must therefore never appear in the above list:
0563: * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
0564: * "modem", "eid", "cid", "mid", "data"
0565: */
0566: return false;
0567: }
0568:
0569: /**
0570: * Return the default port used by a given protocol.
0571: *
0572: * @param protocol the protocol
0573: * @return the port number, or 0 if unknown
0574: */
0575: public final static int defaultPort(String protocol) {
0576: String prot = protocol.trim();
0577:
0578: if (prot.equalsIgnoreCase("http")
0579: || prot.equalsIgnoreCase("shttp")
0580: || prot.equalsIgnoreCase("http-ng")
0581: || prot.equalsIgnoreCase("coffee"))
0582: return 80;
0583: else if (prot.equalsIgnoreCase("https"))
0584: return 443;
0585: else if (prot.equalsIgnoreCase("ftp"))
0586: return 21;
0587: else if (prot.equalsIgnoreCase("telnet"))
0588: return 23;
0589: else if (prot.equalsIgnoreCase("nntp")
0590: || prot.equalsIgnoreCase("news"))
0591: return 119;
0592: else if (prot.equalsIgnoreCase("snews"))
0593: return 563;
0594: else if (prot.equalsIgnoreCase("hnews"))
0595: return 80;
0596: else if (prot.equalsIgnoreCase("smtp"))
0597: return 25;
0598: else if (prot.equalsIgnoreCase("gopher"))
0599: return 70;
0600: else if (prot.equalsIgnoreCase("wais"))
0601: return 210;
0602: else if (prot.equalsIgnoreCase("whois"))
0603: return 43;
0604: else if (prot.equalsIgnoreCase("whois++"))
0605: return 63;
0606: else if (prot.equalsIgnoreCase("rwhois"))
0607: return 4321;
0608: else if (prot.equalsIgnoreCase("imap"))
0609: return 143;
0610: else if (prot.equalsIgnoreCase("pop"))
0611: return 110;
0612: else if (prot.equalsIgnoreCase("prospero"))
0613: return 1525;
0614: else if (prot.equalsIgnoreCase("irc"))
0615: return 194;
0616: else if (prot.equalsIgnoreCase("ldap"))
0617: return 389;
0618: else if (prot.equalsIgnoreCase("nfs"))
0619: return 2049;
0620: else if (prot.equalsIgnoreCase("z39.50r")
0621: || prot.equalsIgnoreCase("z39.50s"))
0622: return 210;
0623: else if (prot.equalsIgnoreCase("vemmi"))
0624: return 575;
0625: else if (prot.equalsIgnoreCase("videotex"))
0626: return 516;
0627: else
0628: return 0;
0629: }
0630:
0631: // Instance Methods
0632:
0633: /**
0634: * @return the scheme (often also referred to as protocol)
0635: */
0636: public String getScheme() {
0637: return scheme;
0638: }
0639:
0640: /**
0641: * @return the opaque part, or null if this URI is generic
0642: */
0643: public String getOpaque() {
0644: return opaque;
0645: }
0646:
0647: /**
0648: * @return the host
0649: */
0650: public String getHost() {
0651: return host;
0652: }
0653:
0654: /**
0655: * @return the port, or -1 if it's the default port
0656: */
0657: public int getPort() {
0658: return port;
0659: }
0660:
0661: /**
0662: * @return the user info
0663: */
0664: public String getUserinfo() {
0665: return userinfo;
0666: }
0667:
0668: /**
0669: * @return the path; this includes the query string
0670: */
0671: public String getPath() {
0672: if (query != null)
0673: if (path != null)
0674: return path + "?" + query;
0675: else
0676: return "?" + query;
0677: return path;
0678: }
0679:
0680: /**
0681: * @return the query string
0682: */
0683: public String getQueryString() {
0684: return query;
0685: }
0686:
0687: /**
0688: * @return the fragment
0689: */
0690: public String getFragment() {
0691: return fragment;
0692: }
0693:
0694: /**
0695: * Does the scheme specific part of this URI use the generic-URI syntax?
0696: *
0697: * <P>In general URI are split into two categories: opaque-URI and
0698: * generic-URI. The generic-URI syntax is the syntax most are familiar
0699: * with from URLs such as ftp- and http-URLs, which is roughly:
0700: * <PRE>
0701: * generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
0702: * </PRE>
0703: * (see draft-fielding-uri-syntax-03 for exact syntax). Only URLs
0704: * using the generic-URI syntax can be used to create and resolve
0705: * relative URIs.
0706: *
0707: * <P>Whether a given scheme is parsed according to the generic-URI
0708: * syntax or wether it is treated as opaque is determined by an internal
0709: * table of URI schemes.
0710: *
0711: * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
0712: */
0713: public boolean isGenericURI() {
0714: return is_generic;
0715: }
0716:
0717: /**
0718: * Will try to create a java.net.URL object from this URI.
0719: *
0720: * @return the URL
0721: * @exception MalformedURLException if no handler is available for the
0722: * scheme
0723: */
0724: public URL toURL() throws MalformedURLException {
0725: if (url != null)
0726: return url;
0727:
0728: if (opaque != null)
0729: return (url = new URL(scheme + ":" + opaque));
0730:
0731: String hostinfo;
0732: if (userinfo != null && host != null)
0733: hostinfo = userinfo + "@" + host;
0734: else if (userinfo != null)
0735: hostinfo = userinfo + "@";
0736: else
0737: hostinfo = host;
0738:
0739: StringBuffer file = new StringBuffer(100);
0740:
0741: if (path != null)
0742: file.append(escape(path.toCharArray(), uricChar));
0743:
0744: if (query != null) {
0745: file.append('?');
0746: file.append(escape(query.toCharArray(), uricChar));
0747: }
0748:
0749: if (fragment != null) {
0750: file.append('#');
0751: file.append(escape(fragment.toCharArray(), uricChar));
0752: }
0753:
0754: url = new URL(scheme, hostinfo, port, file.toString());
0755: return url;
0756: }
0757:
0758: /**
0759: * @return a string representation of this URI suitable for use in
0760: * links, headers, etc.
0761: */
0762: public String toExternalForm() {
0763: StringBuffer uri = new StringBuffer(100);
0764:
0765: if (scheme != null) {
0766: uri.append(escape(scheme.toCharArray(), schemeChar));
0767: uri.append(':');
0768: }
0769:
0770: if (opaque != null) // it's an opaque-uri
0771: {
0772: uri.append(escape(opaque.toCharArray(), uricChar));
0773: return uri.toString();
0774: }
0775:
0776: if (userinfo != null || host != null || port != -1)
0777: uri.append("//");
0778:
0779: if (userinfo != null) {
0780: uri.append(escape(userinfo.toCharArray(), userinfoChar));
0781: uri.append('@');
0782: }
0783:
0784: if (host != null)
0785: uri.append(host.toCharArray());
0786:
0787: if (port != -1) {
0788: uri.append(':');
0789: uri.append(port);
0790: }
0791:
0792: if (path != null)
0793: uri.append(path.toCharArray());
0794:
0795: if (query != null) {
0796: uri.append('?');
0797: uri.append(escape(query.toCharArray(), uricChar));
0798: }
0799:
0800: if (fragment != null) {
0801: uri.append('#');
0802: uri.append(escape(fragment.toCharArray(), uricChar));
0803: }
0804:
0805: return uri.toString();
0806: }
0807:
0808: /**
0809: * @see #toExternalForm
0810: */
0811: public String toString() {
0812: return toExternalForm();
0813: }
0814:
0815: /**
0816: * @return true if <var>other</var> is either a URI or URL and it
0817: * matches the current URI
0818: */
0819: public boolean equals(Object other) {
0820: if (other instanceof URI) {
0821: URI o = (URI) other;
0822: return (scheme.equals(o.scheme) && (!is_generic
0823: && (opaque == null && o.opaque == null || opaque != null
0824: && o.opaque != null
0825: && opaque.equals(o.opaque)) ||
0826:
0827: is_generic
0828: && (userinfo == null && o.userinfo == null || userinfo != null
0829: && o.userinfo != null
0830: && userinfo.equals(o.userinfo))
0831: && (host == null && o.host == null || host != null
0832: && o.host != null
0833: && host.equalsIgnoreCase(o.host))
0834: && port == o.port
0835: && (path == null && o.path == null || path != null
0836: && o.path != null
0837: && unescapeNoPE(path).equals(
0838: unescapeNoPE(o.path)))
0839: && (query == null && o.query == null || query != null
0840: && o.query != null
0841: && unescapeNoPE(query).equals(
0842: unescapeNoPE(o.query)))
0843: && (fragment == null && o.fragment == null || fragment != null
0844: && o.fragment != null
0845: && unescapeNoPE(fragment).equals(
0846: unescapeNoPE(o.fragment)))));
0847: }
0848:
0849: if (other instanceof URL) {
0850: URL o = (URL) other;
0851: String h, f;
0852:
0853: if (userinfo != null)
0854: h = userinfo + "@" + host;
0855: else
0856: h = host;
0857:
0858: if (query != null)
0859: f = path + "?" + query;
0860: else
0861: f = path;
0862:
0863: return (scheme.equalsIgnoreCase(o.getProtocol()) && (!is_generic
0864: && opaque.equals(o.getFile()) || is_generic
0865: && (h == null && o.getHost() == null || h != null
0866: && o.getHost() != null
0867: && h.equalsIgnoreCase(o.getHost()))
0868: && (port == o.getPort() || o.getPort() == defaultPort(scheme))
0869: && (f == null && o.getFile() == null || f != null
0870: && o.getFile() != null
0871: && unescapeNoPE(f).equals(
0872: unescapeNoPE(o.getFile())))
0873: && (fragment == null && o.getRef() == null || fragment != null
0874: && o.getRef() != null
0875: && unescapeNoPE(fragment).equals(
0876: unescapeNoPE(o.getRef())))));
0877: }
0878:
0879: return false;
0880: }
0881:
0882: /**
0883: * Escape any character not in the given character class.
0884: *
0885: * @param elem the array of characters to escape
0886: * @param allowed_char the BitSet of all allowed characters
0887: * @return the elem array with all characters not in allowed_char
0888: * escaped
0889: */
0890: private static char[] escape(char[] elem, BitSet allowed_char) {
0891: int cnt = 0;
0892: for (int idx = 0; idx < elem.length; idx++)
0893: if (!allowed_char.get(elem[idx]))
0894: cnt++;
0895:
0896: if (cnt == 0)
0897: return elem;
0898:
0899: char[] tmp = new char[elem.length + 2 * cnt];
0900: for (int idx = 0, pos = 0; idx < elem.length; idx++, pos++) {
0901: if (allowed_char.get(elem[idx]))
0902: tmp[pos] = elem[idx];
0903: else {
0904: if (elem[idx] > 255)
0905: throw new RuntimeException(
0906: "Can't handle non 8-bt chars");
0907: tmp[pos++] = '%';
0908: tmp[pos++] = hex[(elem[idx] >> 4) & 0xf];
0909: tmp[pos] = hex[elem[idx] & 0xf];
0910: }
0911: }
0912:
0913: return tmp;
0914: }
0915:
0916: private static final char[] hex = { '0', '1', '2', '3', '4', '5',
0917: '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
0918:
0919: /**
0920: * Unescape escaped characters (i.e. %xx).
0921: *
0922: * @param str the string to unescape
0923: * @return the unescaped string
0924: * @exception ParseException if the two digits following a `%' are
0925: * not a valid hex number
0926: */
0927: static final String unescape(String str) throws ParseException {
0928: if (str == null || str.indexOf('%') == -1)
0929: return str; // an optimization
0930:
0931: char[] buf = str.toCharArray();
0932: char[] res = new char[buf.length];
0933:
0934: int didx = 0;
0935: for (int sidx = 0; sidx < buf.length; sidx++, didx++) {
0936: if (buf[sidx] == '%') {
0937: int ch;
0938: try {
0939: ch = Integer.parseInt(str.substring(sidx + 1,
0940: sidx + 3), 16);
0941: if (ch < 0)
0942: throw new NumberFormatException();
0943: } catch (NumberFormatException e) {
0944: throw new ParseException(str.substring(sidx,
0945: sidx + 3)
0946: + " is an invalid code");
0947: }
0948: res[didx] = (char) ch;
0949: sidx += 2;
0950: } else
0951: res[didx] = buf[sidx];
0952: }
0953:
0954: return new String(res, 0, didx);
0955: }
0956:
0957: /**
0958: * Unescape escaped characters (i.e. %xx). If a ParseException would
0959: * be thrown then just return the original string.
0960: *
0961: * @param str the string to unescape
0962: * @return the unescaped string, or the original string if unescaping
0963: * would throw a ParseException
0964: * @see #unescape(java.lang.String)
0965: */
0966: private static final String unescapeNoPE(String str) {
0967: try {
0968: return unescape(str);
0969: } catch (ParseException pe) {
0970: return str;
0971: }
0972: }
0973:
0974: /**
0975: * Run test set.
0976: *
0977: * @exception Exception if any test fails
0978: */
0979: public static void main(String args[]) throws Exception {
0980: System.err.println();
0981: System.err.println("*** URI Tests ...");
0982:
0983: /* Relative URI test set, taken from Section C of rfc-2396 and
0984: * Roy's test1. All Roy's URI parser tests can be found at
0985: * http://www.ics.uci.edu/~fielding/url/
0986: */
0987:
0988: URI base = new URI("http://a/b/c/d;p?q");
0989:
0990: // normal examples
0991: testParser(base, "g:h", "g:h");
0992: testParser(base, "g", "http://a/b/c/g");
0993: testParser(base, "./g", "http://a/b/c/g");
0994: testParser(base, "g/", "http://a/b/c/g/");
0995: testParser(base, "/g", "http://a/g");
0996: testParser(base, "//g", "http://g");
0997: testParser(base, "?y", "http://a/b/c/?y");
0998: testParser(base, "g?y", "http://a/b/c/g?y");
0999: testParser(base, "#s", "http://a/b/c/d;p?q#s");
1000: testParser(base, "g#s", "http://a/b/c/g#s");
1001: testParser(base, "g?y#s", "http://a/b/c/g?y#s");
1002: testParser(base, ";x", "http://a/b/c/;x");
1003: testParser(base, "g;x", "http://a/b/c/g;x");
1004: testParser(base, "g;x?y#s", "http://a/b/c/g;x?y#s");
1005: testParser(base, ".", "http://a/b/c/");
1006: testParser(base, "./", "http://a/b/c/");
1007: testParser(base, "..", "http://a/b/");
1008: testParser(base, "../", "http://a/b/");
1009: testParser(base, "../g", "http://a/b/g");
1010: testParser(base, "../..", "http://a/");
1011: testParser(base, "../../", "http://a/");
1012: testParser(base, "../../g", "http://a/g");
1013:
1014: // abnormal examples
1015: testParser(base, "", "http://a/b/c/d;p?q");
1016: testParser(base, "/./g", "http://a/./g");
1017: testParser(base, "/../g", "http://a/../g");
1018: testParser(base, "../../../g", "http://a/../g");
1019: testParser(base, "../../../../g", "http://a/../../g");
1020: testParser(base, "g.", "http://a/b/c/g.");
1021: testParser(base, ".g", "http://a/b/c/.g");
1022: testParser(base, "g..", "http://a/b/c/g..");
1023: testParser(base, "..g", "http://a/b/c/..g");
1024: testParser(base, "./../g", "http://a/b/g");
1025: testParser(base, "./g/.", "http://a/b/c/g/");
1026: testParser(base, "g/./h", "http://a/b/c/g/h");
1027: testParser(base, "g/../h", "http://a/b/c/h");
1028: testParser(base, "g;x=1/./y", "http://a/b/c/g;x=1/y");
1029: testParser(base, "g;x=1/../y", "http://a/b/c/y");
1030: testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
1031: testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
1032: testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
1033: testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
1034: testParser(base, "http:g", "http:g");
1035: testParser(base, "http:", "http:");
1036: testParser(base, "./g:h", "http://a/b/c/g:h");
1037:
1038: /* Roy's test2
1039: */
1040: base = new URI("http://a/b/c/d;p?q=1/2");
1041:
1042: testParser(base, "g", "http://a/b/c/g");
1043: testParser(base, "./g", "http://a/b/c/g");
1044: testParser(base, "g/", "http://a/b/c/g/");
1045: testParser(base, "/g", "http://a/g");
1046: testParser(base, "//g", "http://g");
1047: testParser(base, "?y", "http://a/b/c/?y");
1048: testParser(base, "g?y", "http://a/b/c/g?y");
1049: testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
1050: testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
1051: testParser(base, "g#s", "http://a/b/c/g#s");
1052: testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
1053: testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
1054: testParser(base, "./", "http://a/b/c/");
1055: testParser(base, "../", "http://a/b/");
1056: testParser(base, "../g", "http://a/b/g");
1057: testParser(base, "../../", "http://a/");
1058: testParser(base, "../../g", "http://a/g");
1059:
1060: /* Roy's test3
1061: */
1062: base = new URI("http://a/b/c/d;p=1/2?q");
1063:
1064: testParser(base, "g", "http://a/b/c/d;p=1/g");
1065: testParser(base, "./g", "http://a/b/c/d;p=1/g");
1066: testParser(base, "g/", "http://a/b/c/d;p=1/g/");
1067: testParser(base, "g?y", "http://a/b/c/d;p=1/g?y");
1068: testParser(base, ";x", "http://a/b/c/d;p=1/;x");
1069: testParser(base, "g;x", "http://a/b/c/d;p=1/g;x");
1070: testParser(base, "g;x=1/./y", "http://a/b/c/d;p=1/g;x=1/y");
1071: testParser(base, "g;x=1/../y", "http://a/b/c/d;p=1/y");
1072: testParser(base, "./", "http://a/b/c/d;p=1/");
1073: testParser(base, "../", "http://a/b/c/");
1074: testParser(base, "../g", "http://a/b/c/g");
1075: testParser(base, "../../", "http://a/b/");
1076: testParser(base, "../../g", "http://a/b/g");
1077:
1078: /* Roy's test4
1079: */
1080: base = new URI("fred:///s//a/b/c");
1081:
1082: testParser(base, "g:h", "g:h");
1083: /* we have to skip these, as usesGeneraicSyntax("fred") returns false
1084: * and we therefore don't parse relative URI's here. But test5 is
1085: * the same except that the http scheme is used.
1086: testParser(base, "g", "fred:///s//a/b/g");
1087: testParser(base, "./g", "fred:///s//a/b/g");
1088: testParser(base, "g/", "fred:///s//a/b/g/");
1089: testParser(base, "/g", "fred:///g");
1090: testParser(base, "//g", "fred://g");
1091: testParser(base, "//g/x", "fred://g/x");
1092: testParser(base, "///g", "fred:///g");
1093: testParser(base, "./", "fred:///s//a/b/");
1094: testParser(base, "../", "fred:///s//a/");
1095: testParser(base, "../g", "fred:///s//a/g");
1096: testParser(base, "../../", "fred:///s//");
1097: testParser(base, "../../g", "fred:///s//g");
1098: testParser(base, "../../../g", "fred:///s/g");
1099: testParser(base, "../../../../g", "fred:///g");
1100: */
1101: testPE(base, "g");
1102:
1103: /* Roy's test5
1104: */
1105: base = new URI("http:///s//a/b/c");
1106:
1107: testParser(base, "g:h", "g:h");
1108: testParser(base, "g", "http:///s//a/b/g");
1109: testParser(base, "./g", "http:///s//a/b/g");
1110: testParser(base, "g/", "http:///s//a/b/g/");
1111: testParser(base, "/g", "http:///g");
1112: testParser(base, "//g", "http://g");
1113: testParser(base, "//g/x", "http://g/x");
1114: testParser(base, "///g", "http:///g");
1115: testParser(base, "./", "http:///s//a/b/");
1116: testParser(base, "../", "http:///s//a/");
1117: testParser(base, "../g", "http:///s//a/g");
1118: testParser(base, "../../", "http:///s//");
1119: testParser(base, "../../g", "http:///s//g");
1120: testParser(base, "../../../g", "http:///s/g");
1121: testParser(base, "../../../../g", "http:///g");
1122:
1123: /* equality tests */
1124:
1125: // protocol
1126: testNotEqual("http://a/", "nntp://a/");
1127: testNotEqual("http://a/", "https://a/");
1128: testNotEqual("http://a/", "shttp://a/");
1129: testEqual("http://a/", "Http://a/");
1130: testEqual("http://a/", "hTTP://a/");
1131: testEqual("url:http://a/", "hTTP://a/");
1132: testEqual("urI:http://a/", "hTTP://a/");
1133:
1134: // host
1135: testEqual("http://a/", "Http://A/");
1136: testEqual("http://a.b.c/", "Http://A.b.C/");
1137: testEqual("http:///", "Http:///");
1138: testNotEqual("http:///", "Http://a/");
1139:
1140: // port
1141: testEqual("http://a.b.c/", "Http://A.b.C:80/");
1142: testEqual("http://a.b.c:/", "Http://A.b.C:80/");
1143: testEqual("nntp://a", "nntp://a:119");
1144: testEqual("nntp://a:", "nntp://a:119");
1145: testEqual("nntp://a/", "nntp://a:119/");
1146: testNotEqual("nntp://a", "nntp://a:118");
1147: testNotEqual("nntp://a", "nntp://a:0");
1148: testNotEqual("nntp://a:", "nntp://a:0");
1149: testEqual("telnet://:23/", "telnet:///");
1150: testPE(null, "ftp://:a/");
1151: testPE(null, "ftp://:-1/");
1152: testPE(null, "ftp://::1/");
1153:
1154: // userinfo
1155: testNotEqual("ftp://me@a", "ftp://a");
1156: testNotEqual("ftp://me@a", "ftp://Me@a");
1157: testEqual("ftp://Me@a", "ftp://Me@a");
1158: testEqual("ftp://Me:My@a:21", "ftp://Me:My@a");
1159: testEqual("ftp://Me:My@a:", "ftp://Me:My@a");
1160: testNotEqual("ftp://Me:My@a:21", "ftp://Me:my@a");
1161: testNotEqual("ftp://Me:My@a:", "ftp://Me:my@a");
1162:
1163: // path
1164: testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1165: testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1166: testEqual("ftp://a/b%5E/", "ftp://a/b^/");
1167: testNotEqual("ftp://a/b%3f/", "ftp://a/b?/");
1168:
1169: System.err.println("*** Tests finished successfuly");
1170: }
1171:
1172: private static void testParser(URI base, String relURI,
1173: String result) throws Exception {
1174: if (!(new URI(base, relURI).toString().equals(result))) {
1175: String nl = System.getProperty("line.separator");
1176: throw new Exception("Test failed: " + nl + " base-URI = <"
1177: + base + ">" + nl + " rel-URI = <" + relURI + ">"
1178: + nl + " expected <" + result + ">" + nl
1179: + " but got <" + new URI(base, relURI) + ">");
1180: }
1181: }
1182:
1183: private static void testEqual(String one, String two)
1184: throws Exception {
1185: if (!(new URI(one).equals(new URI(two)))) {
1186: String nl = System.getProperty("line.separator");
1187: throw new Exception("Test failed: " + nl + " <" + one
1188: + "> != <" + two + ">");
1189: }
1190: }
1191:
1192: private static void testNotEqual(String one, String two)
1193: throws Exception {
1194: if ((new URI(one).equals(new URI(two)))) {
1195: String nl = System.getProperty("line.separator");
1196: throw new Exception("Test failed: " + nl + " <" + one
1197: + "> == <" + two + ">");
1198: }
1199: }
1200:
1201: private static void testPE(URI base, String uri) throws Exception {
1202: boolean got_pe = false;
1203: try {
1204: new URI(base, uri);
1205: } catch (ParseException pe) {
1206: got_pe = true;
1207: }
1208: if (!got_pe) {
1209: String nl = System.getProperty("line.separator");
1210: throw new Exception("Test failed: " + nl + " <" + uri
1211: + "> should be invalid");
1212: }
1213: }
1214: }
|