0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, iClick Inc.,
0053: * http://www.apache.org. For more information on the Apache Software
0054: * Foundation, please see <http://www.apache.org/>.
0055: */
0056:
0057: package com.sun.xml.stream.xerces.util;
0058:
0059: import java.io.IOException;
0060: import java.io.Serializable;
0061:
0062: /**********************************************************************
0063: * A class to represent a Uniform Resource Identifier (URI). This class
0064: * is designed to handle the parsing of URIs and provide access to
0065: * the various components (scheme, host, port, userinfo, path, query
0066: * string and fragment) that may constitute a URI.
0067: * <p>
0068: * Parsing of a URI specification is done according to the URI
0069: * syntax described in RFC 2396
0070: * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
0071: * of a scheme, followed by a colon (':'), followed by a scheme-specific
0072: * part. For URIs that follow the "generic URI" syntax, the scheme-
0073: * specific part begins with two slashes ("//") and may be followed
0074: * by an authority segment (comprised of user information, host, and
0075: * port), path segment, query segment and fragment. Note that RFC 2396
0076: * no longer specifies the use of the parameters segment and excludes
0077: * the "user:password" syntax as part of the authority segment. If
0078: * "user:password" appears in a URI, the entire user/password string
0079: * is stored as userinfo.
0080: * <p>
0081: * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
0082: * the entire scheme-specific part is treated as the "path" portion
0083: * of the URI.
0084: * <p>
0085: * Note that, unlike the java.net.URL class, this class does not provide
0086: * any built-in network access functionality nor does it provide any
0087: * scheme-specific functionality (for example, it does not know a
0088: * default port for a specific scheme). Rather, it only knows the
0089: * grammar and basic set of operations that can be applied to a URI.
0090: *
0091: * @version $Id: URI.java,v 1.2 2006/04/01 06:01:40 jeffsuttor Exp $
0092: *
0093: **********************************************************************/
0094: public class URI implements Serializable {
0095:
0096: /*******************************************************************
0097: * MalformedURIExceptions are thrown in the process of building a URI
0098: * or setting fields on a URI when an operation would result in an
0099: * invalid URI specification.
0100: *
0101: ********************************************************************/
0102: public static class MalformedURIException extends IOException {
0103:
0104: /******************************************************************
0105: * Constructs a <code>MalformedURIException</code> with no specified
0106: * detail message.
0107: ******************************************************************/
0108: public MalformedURIException() {
0109: super ();
0110: }
0111:
0112: /*****************************************************************
0113: * Constructs a <code>MalformedURIException</code> with the
0114: * specified detail message.
0115: *
0116: * @param p_msg the detail message.
0117: ******************************************************************/
0118: public MalformedURIException(String p_msg) {
0119: super (p_msg);
0120: }
0121: }
0122:
0123: /** reserved characters */
0124: //RFC 2732 added '[' and ']' as reserved characters
0125: //private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
0126: private static final String RESERVED_CHARACTERS = ";/?:@&=+$,[]";
0127:
0128: /** URI punctuation mark characters - these, combined with
0129: alphanumerics, constitute the "unreserved" characters */
0130: private static final String MARK_CHARACTERS = "-_.!~*'()";
0131:
0132: /** scheme can be composed of alphanumerics and these characters */
0133: private static final String SCHEME_CHARACTERS = "+-.";
0134:
0135: /** userinfo can be composed of unreserved, escaped and these
0136: characters */
0137: private static final String USERINFO_CHARACTERS = ";:&=+$,";
0138:
0139: /** Stores the scheme (usually the protocol) for this URI. */
0140: private String m_scheme = null;
0141:
0142: /** If specified, stores the userinfo for this URI; otherwise null */
0143: private String m_userinfo = null;
0144:
0145: /** If specified, stores the host for this URI; otherwise null */
0146: private String m_host = null;
0147:
0148: /** If specified, stores the port for this URI; otherwise -1 */
0149: private int m_port = -1;
0150:
0151: /** If specified, stores the path for this URI; otherwise null */
0152: private String m_path = null;
0153:
0154: /** If specified, stores the query string for this URI; otherwise
0155: null. */
0156: private String m_queryString = null;
0157:
0158: /** If specified, stores the fragment for this URI; otherwise null */
0159: private String m_fragment = null;
0160:
0161: private static boolean DEBUG = false;
0162:
0163: /**
0164: * Construct a new and uninitialized URI.
0165: */
0166: public URI() {
0167: }
0168:
0169: /**
0170: * Construct a new URI from another URI. All fields for this URI are
0171: * set equal to the fields of the URI passed in.
0172: *
0173: * @param p_other the URI to copy (cannot be null)
0174: */
0175: public URI(URI p_other) {
0176: initialize(p_other);
0177: }
0178:
0179: /**
0180: * Construct a new URI from a URI specification string. If the
0181: * specification follows the "generic URI" syntax, (two slashes
0182: * following the first colon), the specification will be parsed
0183: * accordingly - setting the scheme, userinfo, host,port, path, query
0184: * string and fragment fields as necessary. If the specification does
0185: * not follow the "generic URI" syntax, the specification is parsed
0186: * into a scheme and scheme-specific part (stored as the path) only.
0187: *
0188: * @param p_uriSpec the URI specification string (cannot be null or
0189: * empty)
0190: *
0191: * @exception MalformedURIException if p_uriSpec violates any syntax
0192: * rules
0193: */
0194: public URI(String p_uriSpec) throws MalformedURIException {
0195: this ((URI) null, p_uriSpec);
0196: }
0197:
0198: /**
0199: * Construct a new URI from a base URI and a URI specification string.
0200: * The URI specification string may be a relative URI.
0201: *
0202: * @param p_base the base URI (cannot be null if p_uriSpec is null or
0203: * empty)
0204: * @param p_uriSpec the URI specification string (cannot be null or
0205: * empty if p_base is null)
0206: *
0207: * @exception MalformedURIException if p_uriSpec violates any syntax
0208: * rules
0209: */
0210: public URI(URI p_base, String p_uriSpec)
0211: throws MalformedURIException {
0212: initialize(p_base, p_uriSpec);
0213: }
0214:
0215: /**
0216: * Construct a new URI that does not follow the generic URI syntax.
0217: * Only the scheme and scheme-specific part (stored as the path) are
0218: * initialized.
0219: *
0220: * @param p_scheme the URI scheme (cannot be null or empty)
0221: * @param p_schemeSpecificPart the scheme-specific part (cannot be
0222: * null or empty)
0223: *
0224: * @exception MalformedURIException if p_scheme violates any
0225: * syntax rules
0226: */
0227: public URI(String p_scheme, String p_schemeSpecificPart)
0228: throws MalformedURIException {
0229: if (p_scheme == null || p_scheme.trim().length() == 0) {
0230: throw new MalformedURIException(
0231: "Cannot construct URI with null/empty scheme!");
0232: }
0233: if (p_schemeSpecificPart == null
0234: || p_schemeSpecificPart.trim().length() == 0) {
0235: throw new MalformedURIException(
0236: "Cannot construct URI with null/empty scheme-specific part!");
0237: }
0238: setScheme(p_scheme);
0239: setPath(p_schemeSpecificPart);
0240: }
0241:
0242: /**
0243: * Construct a new URI that follows the generic URI syntax from its
0244: * component parts. Each component is validated for syntax and some
0245: * basic semantic checks are performed as well. See the individual
0246: * setter methods for specifics.
0247: *
0248: * @param p_scheme the URI scheme (cannot be null or empty)
0249: * @param p_host the hostname or IPv4 address for the URI
0250: * @param p_path the URI path - if the path contains '?' or '#',
0251: * then the query string and/or fragment will be
0252: * set from the path; however, if the query and
0253: * fragment are specified both in the path and as
0254: * separate parameters, an exception is thrown
0255: * @param p_queryString the URI query string (cannot be specified
0256: * if path is null)
0257: * @param p_fragment the URI fragment (cannot be specified if path
0258: * is null)
0259: *
0260: * @exception MalformedURIException if any of the parameters violates
0261: * syntax rules or semantic rules
0262: */
0263: public URI(String p_scheme, String p_host, String p_path,
0264: String p_queryString, String p_fragment)
0265: throws MalformedURIException {
0266: this (p_scheme, null, p_host, -1, p_path, p_queryString,
0267: p_fragment);
0268: }
0269:
0270: /**
0271: * Construct a new URI that follows the generic URI syntax from its
0272: * component parts. Each component is validated for syntax and some
0273: * basic semantic checks are performed as well. See the individual
0274: * setter methods for specifics.
0275: *
0276: * @param p_scheme the URI scheme (cannot be null or empty)
0277: * @param p_userinfo the URI userinfo (cannot be specified if host
0278: * is null)
0279: * @param p_host the hostname or IPv4 address for the URI
0280: * @param p_port the URI port (may be -1 for "unspecified"; cannot
0281: * be specified if host is null)
0282: * @param p_path the URI path - if the path contains '?' or '#',
0283: * then the query string and/or fragment will be
0284: * set from the path; however, if the query and
0285: * fragment are specified both in the path and as
0286: * separate parameters, an exception is thrown
0287: * @param p_queryString the URI query string (cannot be specified
0288: * if path is null)
0289: * @param p_fragment the URI fragment (cannot be specified if path
0290: * is null)
0291: *
0292: * @exception MalformedURIException if any of the parameters violates
0293: * syntax rules or semantic rules
0294: */
0295: public URI(String p_scheme, String p_userinfo, String p_host,
0296: int p_port, String p_path, String p_queryString,
0297: String p_fragment) throws MalformedURIException {
0298: if (p_scheme == null || p_scheme.trim().length() == 0) {
0299: throw new MalformedURIException("Scheme is required!");
0300: }
0301:
0302: if (p_host == null) {
0303: if (p_userinfo != null) {
0304: throw new MalformedURIException(
0305: "Userinfo may not be specified if host is not specified!");
0306: }
0307: if (p_port != -1) {
0308: throw new MalformedURIException(
0309: "Port may not be specified if host is not specified!");
0310: }
0311: }
0312:
0313: if (p_path != null) {
0314: if (p_path.indexOf('?') != -1 && p_queryString != null) {
0315: throw new MalformedURIException(
0316: "Query string cannot be specified in path and query string!");
0317: }
0318:
0319: if (p_path.indexOf('#') != -1 && p_fragment != null) {
0320: throw new MalformedURIException(
0321: "Fragment cannot be specified in both the path and fragment!");
0322: }
0323: }
0324:
0325: setScheme(p_scheme);
0326: setHost(p_host);
0327: setPort(p_port);
0328: setUserinfo(p_userinfo);
0329: setPath(p_path);
0330: setQueryString(p_queryString);
0331: setFragment(p_fragment);
0332: }
0333:
0334: /**
0335: * Initialize all fields of this URI from another URI.
0336: *
0337: * @param p_other the URI to copy (cannot be null)
0338: */
0339: private void initialize(URI p_other) {
0340: m_scheme = p_other.getScheme();
0341: m_userinfo = p_other.getUserinfo();
0342: m_host = p_other.getHost();
0343: m_port = p_other.getPort();
0344: m_path = p_other.getPath();
0345: m_queryString = p_other.getQueryString();
0346: m_fragment = p_other.getFragment();
0347: }
0348:
0349: /**
0350: * Initializes this URI from a base URI and a URI specification string.
0351: * See RFC 2396 Section 4 and Appendix B for specifications on parsing
0352: * the URI and Section 5 for specifications on resolving relative URIs
0353: * and relative paths.
0354: *
0355: * @param p_base the base URI (may be null if p_uriSpec is an absolute
0356: * URI)
0357: * @param p_uriSpec the URI spec string which may be an absolute or
0358: * relative URI (can only be null/empty if p_base
0359: * is not null)
0360: *
0361: * @exception MalformedURIException if p_base is null and p_uriSpec
0362: * is not an absolute URI or if
0363: * p_uriSpec violates syntax rules
0364: */
0365: private void initialize(URI p_base, String p_uriSpec)
0366: throws MalformedURIException {
0367: if (p_base == null
0368: && (p_uriSpec == null || p_uriSpec.trim().length() == 0)) {
0369: throw new MalformedURIException(
0370: "Cannot initialize URI with empty parameters.");
0371: }
0372:
0373: // just make a copy of the base if spec is empty
0374: if (p_uriSpec == null || p_uriSpec.trim().length() == 0) {
0375: initialize(p_base);
0376: return;
0377: }
0378:
0379: String uriSpec = p_uriSpec.trim();
0380: int uriSpecLen = uriSpec.length();
0381: int index = 0;
0382:
0383: // Check for scheme, which must be before '/', '?' or '#'. Also handle
0384: // names with DOS drive letters ('D:'), so 1-character schemes are not
0385: // allowed.
0386: int colonIdx = uriSpec.indexOf(':');
0387: int slashIdx = uriSpec.indexOf('/');
0388: int queryIdx = uriSpec.indexOf('?');
0389: int fragmentIdx = uriSpec.indexOf('#');
0390:
0391: if ((colonIdx < 2) || (colonIdx > slashIdx && slashIdx != -1)
0392: || (colonIdx > queryIdx && queryIdx != -1)
0393: || (colonIdx > fragmentIdx && fragmentIdx != -1)) {
0394: // A standalone base is a valid URI according to spec
0395: if (p_base == null && fragmentIdx != 0) {
0396: throw new MalformedURIException(
0397: "No scheme found in URI.");
0398: }
0399: } else {
0400: initializeScheme(uriSpec);
0401: index = m_scheme.length() + 1;
0402: }
0403:
0404: // two slashes means generic URI syntax, so we get the authority
0405: if (((index + 1) < uriSpecLen)
0406: && (uriSpec.substring(index).startsWith("//"))) {
0407: index += 2;
0408: int startPos = index;
0409:
0410: // get authority - everything up to path, query or fragment
0411: char testChar = '\0';
0412: while (index < uriSpecLen) {
0413: testChar = uriSpec.charAt(index);
0414: if (testChar == '/' || testChar == '?'
0415: || testChar == '#') {
0416: break;
0417: }
0418: index++;
0419: }
0420:
0421: // if we found authority, parse it out, otherwise we set the
0422: // host to empty string
0423: if (index > startPos) {
0424: initializeAuthority(uriSpec.substring(startPos, index));
0425: } else {
0426: m_host = "";
0427: }
0428: }
0429:
0430: initializePath(uriSpec.substring(index));
0431:
0432: // Resolve relative URI to base URI - see RFC 2396 Section 5.2
0433: // In some cases, it might make more sense to throw an exception
0434: // (when scheme is specified is the string spec and the base URI
0435: // is also specified, for example), but we're just following the
0436: // RFC specifications
0437: if (p_base != null) {
0438:
0439: // check to see if this is the current doc - RFC 2396 5.2 #2
0440: // note that this is slightly different from the RFC spec in that
0441: // we don't include the check for query string being null
0442: // - this handles cases where the urispec is just a query
0443: // string or a fragment (e.g. "?y" or "#s") -
0444: // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
0445: // identified this as a bug in the RFC
0446: if (m_path.length() == 0 && m_scheme == null
0447: && m_host == null) {
0448: m_scheme = p_base.getScheme();
0449: m_userinfo = p_base.getUserinfo();
0450: m_host = p_base.getHost();
0451: m_port = p_base.getPort();
0452: m_path = p_base.getPath();
0453:
0454: if (m_queryString == null) {
0455: m_queryString = p_base.getQueryString();
0456: }
0457: return;
0458: }
0459:
0460: // check for scheme - RFC 2396 5.2 #3
0461: // if we found a scheme, it means absolute URI, so we're done
0462: if (m_scheme == null) {
0463: m_scheme = p_base.getScheme();
0464: } else {
0465: return;
0466: }
0467:
0468: // check for authority - RFC 2396 5.2 #4
0469: // if we found a host, then we've got a network path, so we're done
0470: if (m_host == null) {
0471: m_userinfo = p_base.getUserinfo();
0472: m_host = p_base.getHost();
0473: m_port = p_base.getPort();
0474: } else {
0475: return;
0476: }
0477:
0478: // check for absolute path - RFC 2396 5.2 #5
0479: if (m_path.length() > 0 && m_path.startsWith("/")) {
0480: return;
0481: }
0482:
0483: // if we get to this point, we need to resolve relative path
0484: // RFC 2396 5.2 #6
0485: String path = new String();
0486: String basePath = p_base.getPath();
0487:
0488: // 6a - get all but the last segment of the base URI path
0489: if (basePath != null) {
0490: int lastSlash = basePath.lastIndexOf('/');
0491: if (lastSlash != -1) {
0492: path = basePath.substring(0, lastSlash + 1);
0493: }
0494: }
0495:
0496: // 6b - append the relative URI path
0497: path = path.concat(m_path);
0498:
0499: // 6c - remove all "./" where "." is a complete path segment
0500: index = -1;
0501: while ((index = path.indexOf("/./")) != -1) {
0502: path = path.substring(0, index + 1).concat(
0503: path.substring(index + 3));
0504: }
0505:
0506: // 6d - remove "." if path ends with "." as a complete path segment
0507: if (path.endsWith("/.")) {
0508: path = path.substring(0, path.length() - 1);
0509: }
0510:
0511: // 6e - remove all "<segment>/../" where "<segment>" is a complete
0512: // path segment not equal to ".."
0513: index = 1;
0514: int segIndex = -1;
0515: String tempString = null;
0516:
0517: while ((index = path.indexOf("/../", index)) > 0) {
0518: tempString = path.substring(0, path.indexOf("/../"));
0519: segIndex = tempString.lastIndexOf('/');
0520: if (segIndex != -1) {
0521: if (!tempString.substring(segIndex).equals("..")) {
0522: path = path.substring(0, segIndex + 1).concat(
0523: path.substring(index + 4));
0524: index = segIndex;
0525: } else
0526: index += 4;
0527: } else
0528: index += 4;
0529: }
0530:
0531: // 6f - remove ending "<segment>/.." where "<segment>" is a
0532: // complete path segment
0533: if (path.endsWith("/..")) {
0534: tempString = path.substring(0, path.length() - 3);
0535: segIndex = tempString.lastIndexOf('/');
0536: if (segIndex != -1) {
0537: path = path.substring(0, segIndex + 1);
0538: }
0539: }
0540: m_path = path;
0541: }
0542: }
0543:
0544: /**
0545: * Initialize the scheme for this URI from a URI string spec.
0546: *
0547: * @param p_uriSpec the URI specification (cannot be null)
0548: *
0549: * @exception MalformedURIException if URI does not have a conformant
0550: * scheme
0551: */
0552: private void initializeScheme(String p_uriSpec)
0553: throws MalformedURIException {
0554: int uriSpecLen = p_uriSpec.length();
0555: int index = 0;
0556: String scheme = null;
0557: char testChar = '\0';
0558:
0559: while (index < uriSpecLen) {
0560: testChar = p_uriSpec.charAt(index);
0561: if (testChar == ':' || testChar == '/' || testChar == '?'
0562: || testChar == '#') {
0563: break;
0564: }
0565: index++;
0566: }
0567: scheme = p_uriSpec.substring(0, index);
0568:
0569: if (scheme.length() == 0) {
0570: throw new MalformedURIException("No scheme found in URI.");
0571: } else {
0572: setScheme(scheme);
0573: }
0574: }
0575:
0576: /**
0577: * Initialize the authority (userinfo, host and port) for this
0578: * URI from a URI string spec.
0579: *
0580: * @param p_uriSpec the URI specification (cannot be null)
0581: *
0582: * @exception MalformedURIException if p_uriSpec violates syntax rules
0583: */
0584: private void initializeAuthority(String p_uriSpec)
0585: throws MalformedURIException {
0586: int index = 0;
0587: int start = 0;
0588: int end = p_uriSpec.length();
0589: char testChar = '\0';
0590: String userinfo = null;
0591:
0592: // userinfo is everything up @
0593: if (p_uriSpec.indexOf('@', start) != -1) {
0594: while (index < end) {
0595: testChar = p_uriSpec.charAt(index);
0596: if (testChar == '@') {
0597: break;
0598: }
0599: index++;
0600: }
0601: userinfo = p_uriSpec.substring(start, index);
0602: index++;
0603: }
0604:
0605: // host is everything up to ':'
0606: String host = null;
0607: start = index;
0608: while (index < end) {
0609: testChar = p_uriSpec.charAt(index);
0610: if (testChar == ':') {
0611: break;
0612: }
0613: index++;
0614: }
0615: host = p_uriSpec.substring(start, index);
0616: int port = -1;
0617: if (host.length() > 0) {
0618: // port
0619: if (testChar == ':') {
0620: index++;
0621: start = index;
0622: while (index < end) {
0623: index++;
0624: }
0625: String portStr = p_uriSpec.substring(start, index);
0626: if (portStr.length() > 0) {
0627: for (int i = 0; i < portStr.length(); i++) {
0628: if (!isDigit(portStr.charAt(i))) {
0629: throw new MalformedURIException(
0630: portStr
0631: + " is invalid. Port should only contain digits!");
0632: }
0633: }
0634: try {
0635: port = Integer.parseInt(portStr);
0636: } catch (NumberFormatException nfe) {
0637: // can't happen
0638: }
0639: }
0640: }
0641: }
0642: setHost(host);
0643: setPort(port);
0644: setUserinfo(userinfo);
0645: }
0646:
0647: /**
0648: * Initialize the path for this URI from a URI string spec.
0649: *
0650: * @param p_uriSpec the URI specification (cannot be null)
0651: *
0652: * @exception MalformedURIException if p_uriSpec violates syntax rules
0653: */
0654: private void initializePath(String p_uriSpec)
0655: throws MalformedURIException {
0656: if (p_uriSpec == null) {
0657: throw new MalformedURIException(
0658: "Cannot initialize path from null string!");
0659: }
0660:
0661: int index = 0;
0662: int start = 0;
0663: int end = p_uriSpec.length();
0664: char testChar = '\0';
0665:
0666: // path - everything up to query string or fragment
0667: while (index < end) {
0668: testChar = p_uriSpec.charAt(index);
0669: if (testChar == '?' || testChar == '#') {
0670: break;
0671: }
0672: // check for valid escape sequence
0673: if (testChar == '%') {
0674: if (index + 2 >= end
0675: || !isHex(p_uriSpec.charAt(index + 1))
0676: || !isHex(p_uriSpec.charAt(index + 2))) {
0677: throw new MalformedURIException(
0678: "Path contains invalid escape sequence!");
0679: }
0680: } else if (!isReservedCharacter(testChar)
0681: && !isUnreservedCharacter(testChar)) {
0682: throw new MalformedURIException(
0683: "Path contains invalid character: " + testChar);
0684: }
0685: index++;
0686: }
0687: m_path = p_uriSpec.substring(start, index);
0688:
0689: // query - starts with ? and up to fragment or end
0690: if (testChar == '?') {
0691: index++;
0692: start = index;
0693: while (index < end) {
0694: testChar = p_uriSpec.charAt(index);
0695: if (testChar == '#') {
0696: break;
0697: }
0698: if (testChar == '%') {
0699: if (index + 2 >= end
0700: || !isHex(p_uriSpec.charAt(index + 1))
0701: || !isHex(p_uriSpec.charAt(index + 2))) {
0702: throw new MalformedURIException(
0703: "Query string contains invalid escape sequence!");
0704: }
0705: } else if (!isReservedCharacter(testChar)
0706: && !isUnreservedCharacter(testChar)) {
0707: throw new MalformedURIException(
0708: "Query string contains invalid character:"
0709: + testChar);
0710: }
0711: index++;
0712: }
0713: m_queryString = p_uriSpec.substring(start, index);
0714: }
0715:
0716: // fragment - starts with #
0717: if (testChar == '#') {
0718: index++;
0719: start = index;
0720: while (index < end) {
0721: testChar = p_uriSpec.charAt(index);
0722:
0723: if (testChar == '%') {
0724: if (index + 2 >= end
0725: || !isHex(p_uriSpec.charAt(index + 1))
0726: || !isHex(p_uriSpec.charAt(index + 2))) {
0727: throw new MalformedURIException(
0728: "Fragment contains invalid escape sequence!");
0729: }
0730: } else if (!isReservedCharacter(testChar)
0731: && !isUnreservedCharacter(testChar)) {
0732: throw new MalformedURIException(
0733: "Fragment contains invalid character:"
0734: + testChar);
0735: }
0736: index++;
0737: }
0738: m_fragment = p_uriSpec.substring(start, index);
0739: }
0740: }
0741:
0742: /**
0743: * Get the scheme for this URI.
0744: *
0745: * @return the scheme for this URI
0746: */
0747: public String getScheme() {
0748: return m_scheme;
0749: }
0750:
0751: /**
0752: * Get the scheme-specific part for this URI (everything following the
0753: * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
0754: *
0755: * @return the scheme-specific part for this URI
0756: */
0757: public String getSchemeSpecificPart() {
0758: StringBuffer schemespec = new StringBuffer();
0759:
0760: if (m_userinfo != null || m_host != null || m_port != -1) {
0761: schemespec.append("//");
0762: }
0763:
0764: if (m_userinfo != null) {
0765: schemespec.append(m_userinfo);
0766: schemespec.append('@');
0767: }
0768:
0769: if (m_host != null) {
0770: schemespec.append(m_host);
0771: }
0772:
0773: if (m_port != -1) {
0774: schemespec.append(':');
0775: schemespec.append(m_port);
0776: }
0777:
0778: if (m_path != null) {
0779: schemespec.append((m_path));
0780: }
0781:
0782: if (m_queryString != null) {
0783: schemespec.append('?');
0784: schemespec.append(m_queryString);
0785: }
0786:
0787: if (m_fragment != null) {
0788: schemespec.append('#');
0789: schemespec.append(m_fragment);
0790: }
0791:
0792: return schemespec.toString();
0793: }
0794:
0795: /**
0796: * Get the userinfo for this URI.
0797: *
0798: * @return the userinfo for this URI (null if not specified).
0799: */
0800: public String getUserinfo() {
0801: return m_userinfo;
0802: }
0803:
0804: /**
0805: * Get the host for this URI.
0806: *
0807: * @return the host for this URI (null if not specified).
0808: */
0809: public String getHost() {
0810: return m_host;
0811: }
0812:
0813: /**
0814: * Get the port for this URI.
0815: *
0816: * @return the port for this URI (-1 if not specified).
0817: */
0818: public int getPort() {
0819: return m_port;
0820: }
0821:
0822: /**
0823: * Get the path for this URI (optionally with the query string and
0824: * fragment).
0825: *
0826: * @param p_includeQueryString if true (and query string is not null),
0827: * then a "?" followed by the query string
0828: * will be appended
0829: * @param p_includeFragment if true (and fragment is not null),
0830: * then a "#" followed by the fragment
0831: * will be appended
0832: *
0833: * @return the path for this URI possibly including the query string
0834: * and fragment
0835: */
0836: public String getPath(boolean p_includeQueryString,
0837: boolean p_includeFragment) {
0838: StringBuffer pathString = new StringBuffer(m_path);
0839:
0840: if (p_includeQueryString && m_queryString != null) {
0841: pathString.append('?');
0842: pathString.append(m_queryString);
0843: }
0844:
0845: if (p_includeFragment && m_fragment != null) {
0846: pathString.append('#');
0847: pathString.append(m_fragment);
0848: }
0849: return pathString.toString();
0850: }
0851:
0852: /**
0853: * Get the path for this URI. Note that the value returned is the path
0854: * only and does not include the query string or fragment.
0855: *
0856: * @return the path for this URI.
0857: */
0858: public String getPath() {
0859: return m_path;
0860: }
0861:
0862: /**
0863: * Get the query string for this URI.
0864: *
0865: * @return the query string for this URI. Null is returned if there
0866: * was no "?" in the URI spec, empty string if there was a
0867: * "?" but no query string following it.
0868: */
0869: public String getQueryString() {
0870: return m_queryString;
0871: }
0872:
0873: /**
0874: * Get the fragment for this URI.
0875: *
0876: * @return the fragment for this URI. Null is returned if there
0877: * was no "#" in the URI spec, empty string if there was a
0878: * "#" but no fragment following it.
0879: */
0880: public String getFragment() {
0881: return m_fragment;
0882: }
0883:
0884: /**
0885: * Set the scheme for this URI. The scheme is converted to lowercase
0886: * before it is set.
0887: *
0888: * @param p_scheme the scheme for this URI (cannot be null)
0889: *
0890: * @exception MalformedURIException if p_scheme is not a conformant
0891: * scheme name
0892: */
0893: public void setScheme(String p_scheme) throws MalformedURIException {
0894: if (p_scheme == null) {
0895: throw new MalformedURIException(
0896: "Cannot set scheme from null string!");
0897: }
0898: if (!isConformantSchemeName(p_scheme)) {
0899: throw new MalformedURIException(
0900: "The scheme is not conformant.");
0901: }
0902:
0903: m_scheme = p_scheme.toLowerCase();
0904: }
0905:
0906: /**
0907: * Set the userinfo for this URI. If a non-null value is passed in and
0908: * the host value is null, then an exception is thrown.
0909: *
0910: * @param p_userinfo the userinfo for this URI
0911: *
0912: * @exception MalformedURIException if p_userinfo contains invalid
0913: * characters
0914: */
0915: public void setUserinfo(String p_userinfo)
0916: throws MalformedURIException {
0917: if (p_userinfo == null) {
0918: m_userinfo = null;
0919: } else {
0920: if (m_host == null) {
0921: throw new MalformedURIException(
0922: "Userinfo cannot be set when host is null!");
0923: }
0924:
0925: // userinfo can contain alphanumerics, mark characters, escaped
0926: // and ';',':','&','=','+','$',','
0927: int index = 0;
0928: int end = p_userinfo.length();
0929: char testChar = '\0';
0930: while (index < end) {
0931: testChar = p_userinfo.charAt(index);
0932: if (testChar == '%') {
0933: if (index + 2 >= end
0934: || !isHex(p_userinfo.charAt(index + 1))
0935: || !isHex(p_userinfo.charAt(index + 2))) {
0936: throw new MalformedURIException(
0937: "Userinfo contains invalid escape sequence!");
0938: }
0939: } else if (!isUnreservedCharacter(testChar)
0940: && USERINFO_CHARACTERS.indexOf(testChar) == -1) {
0941: throw new MalformedURIException(
0942: "Userinfo contains invalid character:"
0943: + testChar);
0944: }
0945: index++;
0946: }
0947: }
0948: m_userinfo = p_userinfo;
0949: }
0950:
0951: /**
0952: * Set the host for this URI. If null is passed in, the userinfo
0953: * field is also set to null and the port is set to -1.
0954: *
0955: * @param p_host the host for this URI
0956: *
0957: * @exception MalformedURIException if p_host is not a valid IP
0958: * address or DNS hostname.
0959: */
0960: public void setHost(String p_host) throws MalformedURIException {
0961: if (p_host == null || p_host.trim().length() == 0) {
0962: m_host = p_host;
0963: m_userinfo = null;
0964: m_port = -1;
0965: } else if (!isWellFormedAddress(p_host)) {
0966: throw new MalformedURIException(
0967: "Host is not a well formed address!");
0968: }
0969: m_host = p_host;
0970: }
0971:
0972: /**
0973: * Set the port for this URI. -1 is used to indicate that the port is
0974: * not specified, otherwise valid port numbers are between 0 and 65535.
0975: * If a valid port number is passed in and the host field is null,
0976: * an exception is thrown.
0977: *
0978: * @param p_port the port number for this URI
0979: *
0980: * @exception MalformedURIException if p_port is not -1 and not a
0981: * valid port number
0982: */
0983: public void setPort(int p_port) throws MalformedURIException {
0984: if (p_port >= 0 && p_port <= 65535) {
0985: if (m_host == null) {
0986: throw new MalformedURIException(
0987: "Port cannot be set when host is null!");
0988: }
0989: } else if (p_port != -1) {
0990: throw new MalformedURIException("Invalid port number!");
0991: }
0992: m_port = p_port;
0993: }
0994:
0995: /**
0996: * Set the path for this URI. If the supplied path is null, then the
0997: * query string and fragment are set to null as well. If the supplied
0998: * path includes a query string and/or fragment, these fields will be
0999: * parsed and set as well. Note that, for URIs following the "generic
1000: * URI" syntax, the path specified should start with a slash.
1001: * For URIs that do not follow the generic URI syntax, this method
1002: * sets the scheme-specific part.
1003: *
1004: * @param p_path the path for this URI (may be null)
1005: *
1006: * @exception MalformedURIException if p_path contains invalid
1007: * characters
1008: */
1009: public void setPath(String p_path) throws MalformedURIException {
1010: if (p_path == null) {
1011: m_path = null;
1012: m_queryString = null;
1013: m_fragment = null;
1014: } else {
1015: initializePath(p_path);
1016: }
1017: }
1018:
1019: /**
1020: * Append to the end of the path of this URI. If the current path does
1021: * not end in a slash and the path to be appended does not begin with
1022: * a slash, a slash will be appended to the current path before the
1023: * new segment is added. Also, if the current path ends in a slash
1024: * and the new segment begins with a slash, the extra slash will be
1025: * removed before the new segment is appended.
1026: *
1027: * @param p_addToPath the new segment to be added to the current path
1028: *
1029: * @exception MalformedURIException if p_addToPath contains syntax
1030: * errors
1031: */
1032: public void appendPath(String p_addToPath)
1033: throws MalformedURIException {
1034: if (p_addToPath == null || p_addToPath.trim().length() == 0) {
1035: return;
1036: }
1037:
1038: if (!isURIString(p_addToPath)) {
1039: throw new MalformedURIException(
1040: "Path contains invalid character!");
1041: }
1042:
1043: if (m_path == null || m_path.trim().length() == 0) {
1044: if (p_addToPath.startsWith("/")) {
1045: m_path = p_addToPath;
1046: } else {
1047: m_path = "/" + p_addToPath;
1048: }
1049: } else if (m_path.endsWith("/")) {
1050: if (p_addToPath.startsWith("/")) {
1051: m_path = m_path.concat(p_addToPath.substring(1));
1052: } else {
1053: m_path = m_path.concat(p_addToPath);
1054: }
1055: } else {
1056: if (p_addToPath.startsWith("/")) {
1057: m_path = m_path.concat(p_addToPath);
1058: } else {
1059: m_path = m_path.concat("/" + p_addToPath);
1060: }
1061: }
1062: }
1063:
1064: /**
1065: * Set the query string for this URI. A non-null value is valid only
1066: * if this is an URI conforming to the generic URI syntax and
1067: * the path value is not null.
1068: *
1069: * @param p_queryString the query string for this URI
1070: *
1071: * @exception MalformedURIException if p_queryString is not null and this
1072: * URI does not conform to the generic
1073: * URI syntax or if the path is null
1074: */
1075: public void setQueryString(String p_queryString)
1076: throws MalformedURIException {
1077: if (p_queryString == null) {
1078: m_queryString = null;
1079: } else if (!isGenericURI()) {
1080: throw new MalformedURIException(
1081: "Query string can only be set for a generic URI!");
1082: } else if (getPath() == null) {
1083: throw new MalformedURIException(
1084: "Query string cannot be set when path is null!");
1085: } else if (!isURIString(p_queryString)) {
1086: throw new MalformedURIException(
1087: "Query string contains invalid character!");
1088: } else {
1089: m_queryString = p_queryString;
1090: }
1091: }
1092:
1093: /**
1094: * Set the fragment for this URI. A non-null value is valid only
1095: * if this is a URI conforming to the generic URI syntax and
1096: * the path value is not null.
1097: *
1098: * @param p_fragment the fragment for this URI
1099: *
1100: * @exception MalformedURIException if p_fragment is not null and this
1101: * URI does not conform to the generic
1102: * URI syntax or if the path is null
1103: */
1104: public void setFragment(String p_fragment)
1105: throws MalformedURIException {
1106: if (p_fragment == null) {
1107: m_fragment = null;
1108: } else if (!isGenericURI()) {
1109: throw new MalformedURIException(
1110: "Fragment can only be set for a generic URI!");
1111: } else if (getPath() == null) {
1112: throw new MalformedURIException(
1113: "Fragment cannot be set when path is null!");
1114: } else if (!isURIString(p_fragment)) {
1115: throw new MalformedURIException(
1116: "Fragment contains invalid character!");
1117: } else {
1118: m_fragment = p_fragment;
1119: }
1120: }
1121:
1122: /**
1123: * Determines if the passed-in Object is equivalent to this URI.
1124: *
1125: * @param p_test the Object to test for equality.
1126: *
1127: * @return true if p_test is a URI with all values equal to this
1128: * URI, false otherwise
1129: */
1130: public boolean equals(Object p_test) {
1131: if (p_test instanceof URI) {
1132: URI testURI = (URI) p_test;
1133: if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null
1134: && testURI.m_scheme != null && m_scheme
1135: .equals(testURI.m_scheme)))
1136: && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null
1137: && testURI.m_userinfo != null && m_userinfo
1138: .equals(testURI.m_userinfo)))
1139: && ((m_host == null && testURI.m_host == null) || (m_host != null
1140: && testURI.m_host != null && m_host
1141: .equals(testURI.m_host)))
1142: && m_port == testURI.m_port
1143: && ((m_path == null && testURI.m_path == null) || (m_path != null
1144: && testURI.m_path != null && m_path
1145: .equals(testURI.m_path)))
1146: && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null
1147: && testURI.m_queryString != null && m_queryString
1148: .equals(testURI.m_queryString)))
1149: && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null
1150: && testURI.m_fragment != null && m_fragment
1151: .equals(testURI.m_fragment)))) {
1152: return true;
1153: }
1154: }
1155: return false;
1156: }
1157:
1158: /**
1159: * Get the URI as a string specification. See RFC 2396 Section 5.2.
1160: *
1161: * @return the URI string specification
1162: */
1163: public String toString() {
1164: StringBuffer uriSpecString = new StringBuffer();
1165:
1166: if (m_scheme != null) {
1167: uriSpecString.append(m_scheme);
1168: uriSpecString.append(':');
1169: }
1170: uriSpecString.append(getSchemeSpecificPart());
1171: return uriSpecString.toString();
1172: }
1173:
1174: /**
1175: * Get the indicator as to whether this URI uses the "generic URI"
1176: * syntax.
1177: *
1178: * @return true if this URI uses the "generic URI" syntax, false
1179: * otherwise
1180: */
1181: public boolean isGenericURI() {
1182: // presence of the host (whether valid or empty) means
1183: // double-slashes which means generic uri
1184: return (m_host != null);
1185: }
1186:
1187: /**
1188: * Determine whether a scheme conforms to the rules for a scheme name.
1189: * A scheme is conformant if it starts with an alphanumeric, and
1190: * contains only alphanumerics, '+','-' and '.'.
1191: *
1192: * @return true if the scheme is conformant, false otherwise
1193: */
1194: public static boolean isConformantSchemeName(String p_scheme) {
1195: if (p_scheme == null || p_scheme.trim().length() == 0) {
1196: return false;
1197: }
1198:
1199: if (!isAlpha(p_scheme.charAt(0))) {
1200: return false;
1201: }
1202:
1203: char testChar;
1204: for (int i = 1; i < p_scheme.length(); i++) {
1205: testChar = p_scheme.charAt(i);
1206: if (!isAlphanum(testChar)
1207: && SCHEME_CHARACTERS.indexOf(testChar) == -1) {
1208: return false;
1209: }
1210: }
1211:
1212: return true;
1213: }
1214:
1215: /**
1216: * Determine whether a string is syntactically capable of representing
1217: * a valid IPv4 address or the domain name of a network host. A valid
1218: * IPv4 address consists of four decimal digit groups separated by a
1219: * '.'. A hostname consists of domain labels (each of which must
1220: * begin and end with an alphanumeric but may contain '-') separated
1221: & by a '.'. See RFC 2396 Section 3.2.2.
1222: *
1223: * @return true if the string is a syntactically valid IPv4 address
1224: * or hostname
1225: */
1226: public static boolean isWellFormedAddress(String p_address) {
1227: if (p_address == null) {
1228: return false;
1229: }
1230:
1231: String address = p_address.trim();
1232: int addrLength = address.length();
1233: if (addrLength == 0 || addrLength > 255) {
1234: return false;
1235: }
1236:
1237: if (address.startsWith(".") || address.startsWith("-")) {
1238: return false;
1239: }
1240:
1241: // rightmost domain label starting with digit indicates IP address
1242: // since top level domain label can only start with an alpha
1243: // see RFC 2396 Section 3.2.2
1244: int index = address.lastIndexOf('.');
1245: if (address.endsWith(".")) {
1246: index = address.substring(0, index).lastIndexOf('.');
1247: }
1248:
1249: if (index + 1 < addrLength
1250: && isDigit(p_address.charAt(index + 1))) {
1251: char testChar;
1252: int numDots = 0;
1253:
1254: // make sure that 1) we see only digits and dot separators, 2) that
1255: // any dot separator is preceded and followed by a digit and
1256: // 3) that we find 3 dots
1257: for (int i = 0; i < addrLength; i++) {
1258: testChar = address.charAt(i);
1259: if (testChar == '.') {
1260: if (!isDigit(address.charAt(i - 1))
1261: || (i + 1 < addrLength && !isDigit(address
1262: .charAt(i + 1)))) {
1263: return false;
1264: }
1265: numDots++;
1266: } else if (!isDigit(testChar)) {
1267: return false;
1268: }
1269: }
1270: if (numDots != 3) {
1271: return false;
1272: }
1273: } else {
1274: // domain labels can contain alphanumerics and '-"
1275: // but must start and end with an alphanumeric
1276: char testChar;
1277:
1278: for (int i = 0; i < addrLength; i++) {
1279: testChar = address.charAt(i);
1280: if (testChar == '.') {
1281: if (!isAlphanum(address.charAt(i - 1))) {
1282: return false;
1283: }
1284: if (i + 1 < addrLength
1285: && !isAlphanum(address.charAt(i + 1))) {
1286: return false;
1287: }
1288: } else if (!isAlphanum(testChar) && testChar != '-') {
1289: return false;
1290: }
1291: }
1292: }
1293: return true;
1294: }
1295:
1296: /**
1297: * Determine whether a char is a digit.
1298: *
1299: * @return true if the char is betweeen '0' and '9', false otherwise
1300: */
1301: private static boolean isDigit(char p_char) {
1302: return p_char >= '0' && p_char <= '9';
1303: }
1304:
1305: /**
1306: * Determine whether a character is a hexadecimal character.
1307: *
1308: * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1309: * or 'A' and 'F', false otherwise
1310: */
1311: private static boolean isHex(char p_char) {
1312: return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F'));
1313: }
1314:
1315: /**
1316: * Determine whether a char is an alphabetic character: a-z or A-Z
1317: *
1318: * @return true if the char is alphabetic, false otherwise
1319: */
1320: private static boolean isAlpha(char p_char) {
1321: return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z'));
1322: }
1323:
1324: /**
1325: * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1326: *
1327: * @return true if the char is alphanumeric, false otherwise
1328: */
1329: private static boolean isAlphanum(char p_char) {
1330: return (isAlpha(p_char) || isDigit(p_char));
1331: }
1332:
1333: /**
1334: * Determine whether a character is a reserved character:
1335: * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1336: *
1337: * @return true if the string contains any reserved characters
1338: */
1339: private static boolean isReservedCharacter(char p_char) {
1340: return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1341: }
1342:
1343: /**
1344: * Determine whether a char is an unreserved character.
1345: *
1346: * @return true if the char is unreserved, false otherwise
1347: */
1348: private static boolean isUnreservedCharacter(char p_char) {
1349: return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1350: }
1351:
1352: /**
1353: * Determine whether a given string contains only URI characters (also
1354: * called "uric" in RFC 2396). uric consist of all reserved
1355: * characters, unreserved characters and escaped characters.
1356: *
1357: * @return true if the string is comprised of uric, false otherwise
1358: */
1359: private static boolean isURIString(String p_uric) {
1360: if (p_uric == null) {
1361: return false;
1362: }
1363: int end = p_uric.length();
1364: char testChar = '\0';
1365: for (int i = 0; i < end; i++) {
1366: testChar = p_uric.charAt(i);
1367: if (testChar == '%') {
1368: if (i + 2 >= end || !isHex(p_uric.charAt(i + 1))
1369: || !isHex(p_uric.charAt(i + 2))) {
1370: return false;
1371: } else {
1372: i += 2;
1373: continue;
1374: }
1375: }
1376: if (isReservedCharacter(testChar)
1377: || isUnreservedCharacter(testChar)) {
1378: continue;
1379: } else {
1380: return false;
1381: }
1382: }
1383: return true;
1384: }
1385: }
|