0001: /*
0002: * $Id: JaxmURI.java,v 1.4 2006/01/27 12:49:51 vj135062 Exp $
0003: * $Revision: 1.4 $
0004: * $Date: 2006/01/27 12:49:51 $
0005: */
0006:
0007: /*
0008: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
0009: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0010: *
0011: * This code is free software; you can redistribute it and/or modify it
0012: * under the terms of the GNU General Public License version 2 only, as
0013: * published by the Free Software Foundation. Sun designates this
0014: * particular file as subject to the "Classpath" exception as provided
0015: * by Sun in the LICENSE file that accompanied this code.
0016: *
0017: * This code is distributed in the hope that it will be useful, but WITHOUT
0018: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0019: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0020: * version 2 for more details (a copy is included in the LICENSE file that
0021: * accompanied this code).
0022: *
0023: * You should have received a copy of the GNU General Public License version
0024: * 2 along with this work; if not, write to the Free Software Foundation,
0025: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0026: *
0027: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0028: * CA 95054 USA or visit www.sun.com if you need additional information or
0029: * have any questions.
0030: */
0031: package com.sun.xml.internal.messaging.saaj.util;
0032:
0033: // Imported from: org.apache.xerces.util
0034: // Needed to work around differences in JDK1.2 and 1.3 and deal with userInfo
0035:
0036: import java.io.IOException;
0037: import java.io.Serializable;
0038:
0039: /**********************************************************************
0040: * A class to represent a Uniform Resource Identifier (URI). This class
0041: * is designed to handle the parsing of URIs and provide access to
0042: * the various components (scheme, host, port, userinfo, path, query
0043: * string and fragment) that may constitute a URI.
0044: * <p>
0045: * Parsing of a URI specification is done according to the URI
0046: * syntax described in RFC 2396
0047: * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
0048: * of a scheme, followed by a colon (':'), followed by a scheme-specific
0049: * part. For URIs that follow the "generic URI" syntax, the scheme-
0050: * specific part begins with two slashes ("//") and may be followed
0051: * by an authority segment (comprised of user information, host, and
0052: * port), path segment, query segment and fragment. Note that RFC 2396
0053: * no longer specifies the use of the parameters segment and excludes
0054: * the "user:password" syntax as part of the authority segment. If
0055: * "user:password" appears in a URI, the entire user/password string
0056: * is stored as userinfo.
0057: * <p>
0058: * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
0059: * the entire scheme-specific part is treated as the "path" portion
0060: * of the URI.
0061: * <p>
0062: * Note that, unlike the java.net.URL class, this class does not provide
0063: * any built-in network access functionality nor does it provide any
0064: * scheme-specific functionality (for example, it does not know a
0065: * default port for a specific scheme). Rather, it only knows the
0066: * grammar and basic set of operations that can be applied to a URI.
0067: *
0068: * @version $Id: JaxmURI.java,v 1.4 2006/01/27 12:49:51 vj135062 Exp $
0069: *
0070: **********************************************************************/
0071: public class JaxmURI implements Serializable {
0072:
0073: /*******************************************************************
0074: * MalformedURIExceptions are thrown in the process of building a URI
0075: * or setting fields on a URI when an operation would result in an
0076: * invalid URI specification.
0077: *
0078: ********************************************************************/
0079: public static class MalformedURIException extends IOException {
0080:
0081: /******************************************************************
0082: * Constructs a <code>MalformedURIException</code> with no specified
0083: * detail message.
0084: ******************************************************************/
0085: public MalformedURIException() {
0086: super ();
0087: }
0088:
0089: /*****************************************************************
0090: * Constructs a <code>MalformedURIException</code> with the
0091: * specified detail message.
0092: *
0093: * @param p_msg the detail message.
0094: ******************************************************************/
0095: public MalformedURIException(String p_msg) {
0096: super (p_msg);
0097: }
0098: }
0099:
0100: /** reserved characters */
0101: private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
0102:
0103: /** URI punctuation mark characters - these, combined with
0104: alphanumerics, constitute the "unreserved" characters */
0105: private static final String MARK_CHARACTERS = "-_.!~*'() ";
0106:
0107: /** scheme can be composed of alphanumerics and these characters */
0108: private static final String SCHEME_CHARACTERS = "+-.";
0109:
0110: /** userinfo can be composed of unreserved, escaped and these
0111: characters */
0112: private static final String USERINFO_CHARACTERS = ";:&=+$,";
0113:
0114: /** Stores the scheme (usually the protocol) for this URI. */
0115: private String m_scheme = null;
0116:
0117: /** If specified, stores the userinfo for this URI; otherwise null */
0118: private String m_userinfo = null;
0119:
0120: /** If specified, stores the host for this URI; otherwise null */
0121: private String m_host = null;
0122:
0123: /** If specified, stores the port for this URI; otherwise -1 */
0124: private int m_port = -1;
0125:
0126: /** If specified, stores the path for this URI; otherwise null */
0127: private String m_path = null;
0128:
0129: /** If specified, stores the query string for this URI; otherwise
0130: null. */
0131: private String m_queryString = null;
0132:
0133: /** If specified, stores the fragment for this URI; otherwise null */
0134: private String m_fragment = null;
0135:
0136: private static boolean DEBUG = false;
0137:
0138: /**
0139: * Construct a new and uninitialized URI.
0140: */
0141: public JaxmURI() {
0142: }
0143:
0144: /**
0145: * Construct a new URI from another URI. All fields for this URI are
0146: * set equal to the fields of the URI passed in.
0147: *
0148: * @param p_other the URI to copy (cannot be null)
0149: */
0150: public JaxmURI(JaxmURI p_other) {
0151: initialize(p_other);
0152: }
0153:
0154: /**
0155: * Construct a new URI from a URI specification string. If the
0156: * specification follows the "generic URI" syntax, (two slashes
0157: * following the first colon), the specification will be parsed
0158: * accordingly - setting the scheme, userinfo, host,port, path, query
0159: * string and fragment fields as necessary. If the specification does
0160: * not follow the "generic URI" syntax, the specification is parsed
0161: * into a scheme and scheme-specific part (stored as the path) only.
0162: *
0163: * @param p_uriSpec the URI specification string (cannot be null or
0164: * empty)
0165: *
0166: * @exception MalformedURIException if p_uriSpec violates any syntax
0167: * rules
0168: */
0169: public JaxmURI(String p_uriSpec) throws MalformedURIException {
0170: this ((JaxmURI) null, p_uriSpec);
0171: }
0172:
0173: /**
0174: * Construct a new URI from a base URI and a URI specification string.
0175: * The URI specification string may be a relative URI.
0176: *
0177: * @param p_base the base URI (cannot be null if p_uriSpec is null or
0178: * empty)
0179: * @param p_uriSpec the URI specification string (cannot be null or
0180: * empty if p_base is null)
0181: *
0182: * @exception MalformedURIException if p_uriSpec violates any syntax
0183: * rules
0184: */
0185: public JaxmURI(JaxmURI p_base, String p_uriSpec)
0186: throws MalformedURIException {
0187: initialize(p_base, p_uriSpec);
0188: }
0189:
0190: /**
0191: * Construct a new URI that does not follow the generic URI syntax.
0192: * Only the scheme and scheme-specific part (stored as the path) are
0193: * initialized.
0194: *
0195: * @param p_scheme the URI scheme (cannot be null or empty)
0196: * @param p_schemeSpecificPart the scheme-specific part (cannot be
0197: * null or empty)
0198: *
0199: * @exception MalformedURIException if p_scheme violates any
0200: * syntax rules
0201: */
0202: public JaxmURI(String p_scheme, String p_schemeSpecificPart)
0203: throws MalformedURIException {
0204: if (p_scheme == null || p_scheme.trim().length() == 0) {
0205: throw new MalformedURIException(
0206: "Cannot construct URI with null/empty scheme!");
0207: }
0208: if (p_schemeSpecificPart == null
0209: || p_schemeSpecificPart.trim().length() == 0) {
0210: throw new MalformedURIException(
0211: "Cannot construct URI with null/empty scheme-specific part!");
0212: }
0213: setScheme(p_scheme);
0214: setPath(p_schemeSpecificPart);
0215: }
0216:
0217: /**
0218: * Construct a new URI that follows the generic URI syntax from its
0219: * component parts. Each component is validated for syntax and some
0220: * basic semantic checks are performed as well. See the individual
0221: * setter methods for specifics.
0222: *
0223: * @param p_scheme the URI scheme (cannot be null or empty)
0224: * @param p_host the hostname or IPv4 address for the URI
0225: * @param p_path the URI path - if the path contains '?' or '#',
0226: * then the query string and/or fragment will be
0227: * set from the path; however, if the query and
0228: * fragment are specified both in the path and as
0229: * separate parameters, an exception is thrown
0230: * @param p_queryString the URI query string (cannot be specified
0231: * if path is null)
0232: * @param p_fragment the URI fragment (cannot be specified if path
0233: * is null)
0234: *
0235: * @exception MalformedURIException if any of the parameters violates
0236: * syntax rules or semantic rules
0237: */
0238: public JaxmURI(String p_scheme, String p_host, String p_path,
0239: String p_queryString, String p_fragment)
0240: throws MalformedURIException {
0241: this (p_scheme, null, p_host, -1, p_path, p_queryString,
0242: p_fragment);
0243: }
0244:
0245: /**
0246: * Construct a new URI that follows the generic URI syntax from its
0247: * component parts. Each component is validated for syntax and some
0248: * basic semantic checks are performed as well. See the individual
0249: * setter methods for specifics.
0250: *
0251: * @param p_scheme the URI scheme (cannot be null or empty)
0252: * @param p_userinfo the URI userinfo (cannot be specified if host
0253: * is null)
0254: * @param p_host the hostname or IPv4 address for the URI
0255: * @param p_port the URI port (may be -1 for "unspecified"; cannot
0256: * be specified if host is null)
0257: * @param p_path the URI path - if the path contains '?' or '#',
0258: * then the query string and/or fragment will be
0259: * set from the path; however, if the query and
0260: * fragment are specified both in the path and as
0261: * separate parameters, an exception is thrown
0262: * @param p_queryString the URI query string (cannot be specified
0263: * if path is null)
0264: * @param p_fragment the URI fragment (cannot be specified if path
0265: * is null)
0266: *
0267: * @exception MalformedURIException if any of the parameters violates
0268: * syntax rules or semantic rules
0269: */
0270: public JaxmURI(String p_scheme, String p_userinfo, String p_host,
0271: int p_port, String p_path, String p_queryString,
0272: String p_fragment) throws MalformedURIException {
0273: if (p_scheme == null || p_scheme.trim().length() == 0) {
0274: throw new MalformedURIException("Scheme is required!");
0275: }
0276:
0277: if (p_host == null) {
0278: if (p_userinfo != null) {
0279: throw new MalformedURIException(
0280: "Userinfo may not be specified if host is not specified!");
0281: }
0282: if (p_port != -1) {
0283: throw new MalformedURIException(
0284: "Port may not be specified if host is not specified!");
0285: }
0286: }
0287:
0288: if (p_path != null) {
0289: if (p_path.indexOf('?') != -1 && p_queryString != null) {
0290: throw new MalformedURIException(
0291: "Query string cannot be specified in path and query string!");
0292: }
0293:
0294: if (p_path.indexOf('#') != -1 && p_fragment != null) {
0295: throw new MalformedURIException(
0296: "Fragment cannot be specified in both the path and fragment!");
0297: }
0298: }
0299:
0300: setScheme(p_scheme);
0301: setHost(p_host);
0302: setPort(p_port);
0303: setUserinfo(p_userinfo);
0304: setPath(p_path);
0305: setQueryString(p_queryString);
0306: setFragment(p_fragment);
0307: }
0308:
0309: /**
0310: * Initialize all fields of this URI from another URI.
0311: *
0312: * @param p_other the URI to copy (cannot be null)
0313: */
0314: private void initialize(JaxmURI p_other) {
0315: m_scheme = p_other.getScheme();
0316: m_userinfo = p_other.getUserinfo();
0317: m_host = p_other.getHost();
0318: m_port = p_other.getPort();
0319: m_path = p_other.getPath();
0320: m_queryString = p_other.getQueryString();
0321: m_fragment = p_other.getFragment();
0322: }
0323:
0324: /**
0325: * Initializes this URI from a base URI and a URI specification string.
0326: * See RFC 2396 Section 4 and Appendix B for specifications on parsing
0327: * the URI and Section 5 for specifications on resolving relative URIs
0328: * and relative paths.
0329: *
0330: * @param p_base the base URI (may be null if p_uriSpec is an absolute
0331: * URI)
0332: * @param p_uriSpec the URI spec string which may be an absolute or
0333: * relative URI (can only be null/empty if p_base
0334: * is not null)
0335: *
0336: * @exception MalformedURIException if p_base is null and p_uriSpec
0337: * is not an absolute URI or if
0338: * p_uriSpec violates syntax rules
0339: */
0340: private void initialize(JaxmURI p_base, String p_uriSpec)
0341: throws MalformedURIException {
0342: if (p_base == null
0343: && (p_uriSpec == null || p_uriSpec.trim().length() == 0)) {
0344: throw new MalformedURIException(
0345: "Cannot initialize URI with empty parameters.");
0346: }
0347:
0348: // just make a copy of the base if spec is empty
0349: if (p_uriSpec == null || p_uriSpec.trim().length() == 0) {
0350: initialize(p_base);
0351: return;
0352: }
0353:
0354: String uriSpec = p_uriSpec.trim();
0355: int uriSpecLen = uriSpec.length();
0356: int index = 0;
0357:
0358: // Check for scheme, which must be before `/'. Also handle names with
0359: // DOS drive letters ('D:'), so 1-character schemes are not allowed.
0360: int colonIdx = uriSpec.indexOf(':');
0361: int slashIdx = uriSpec.indexOf('/');
0362: if ((colonIdx < 2) || (colonIdx > slashIdx && slashIdx != -1)) {
0363: int fragmentIdx = uriSpec.indexOf('#');
0364: // A standalone base is a valid URI according to spec
0365: if (p_base == null && fragmentIdx != 0) {
0366: throw new MalformedURIException(
0367: "No scheme found in URI.");
0368: }
0369: } else {
0370: initializeScheme(uriSpec);
0371: index = m_scheme.length() + 1;
0372: }
0373:
0374: // two slashes means generic URI syntax, so we get the authority
0375: if (((index + 1) < uriSpecLen)
0376: && (uriSpec.substring(index).startsWith("//"))) {
0377: index += 2;
0378: int startPos = index;
0379:
0380: // get authority - everything up to path, query or fragment
0381: char testChar = '\0';
0382: while (index < uriSpecLen) {
0383: testChar = uriSpec.charAt(index);
0384: if (testChar == '/' || testChar == '?'
0385: || testChar == '#') {
0386: break;
0387: }
0388: index++;
0389: }
0390:
0391: // if we found authority, parse it out, otherwise we set the
0392: // host to empty string
0393: if (index > startPos) {
0394: initializeAuthority(uriSpec.substring(startPos, index));
0395: } else {
0396: m_host = "";
0397: }
0398: }
0399:
0400: initializePath(uriSpec.substring(index));
0401:
0402: // Resolve relative URI to base URI - see RFC 2396 Section 5.2
0403: // In some cases, it might make more sense to throw an exception
0404: // (when scheme is specified is the string spec and the base URI
0405: // is also specified, for example), but we're just following the
0406: // RFC specifications
0407: if (p_base != null) {
0408:
0409: // check to see if this is the current doc - RFC 2396 5.2 #2
0410: // note that this is slightly different from the RFC spec in that
0411: // we don't include the check for query string being null
0412: // - this handles cases where the urispec is just a query
0413: // string or a fragment (e.g. "?y" or "#s") -
0414: // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
0415: // identified this as a bug in the RFC
0416: if (m_path.length() == 0 && m_scheme == null
0417: && m_host == null) {
0418: m_scheme = p_base.getScheme();
0419: m_userinfo = p_base.getUserinfo();
0420: m_host = p_base.getHost();
0421: m_port = p_base.getPort();
0422: m_path = p_base.getPath();
0423:
0424: if (m_queryString == null) {
0425: m_queryString = p_base.getQueryString();
0426: }
0427: return;
0428: }
0429:
0430: // check for scheme - RFC 2396 5.2 #3
0431: // if we found a scheme, it means absolute URI, so we're done
0432: if (m_scheme == null) {
0433: m_scheme = p_base.getScheme();
0434: } else {
0435: return;
0436: }
0437:
0438: // check for authority - RFC 2396 5.2 #4
0439: // if we found a host, then we've got a network path, so we're done
0440: if (m_host == null) {
0441: m_userinfo = p_base.getUserinfo();
0442: m_host = p_base.getHost();
0443: m_port = p_base.getPort();
0444: } else {
0445: return;
0446: }
0447:
0448: // check for absolute path - RFC 2396 5.2 #5
0449: if (m_path.length() > 0 && m_path.startsWith("/")) {
0450: return;
0451: }
0452:
0453: // if we get to this point, we need to resolve relative path
0454: // RFC 2396 5.2 #6
0455: String path = new String();
0456: String basePath = p_base.getPath();
0457:
0458: // 6a - get all but the last segment of the base URI path
0459: if (basePath != null) {
0460: int lastSlash = basePath.lastIndexOf('/');
0461: if (lastSlash != -1) {
0462: path = basePath.substring(0, lastSlash + 1);
0463: }
0464: }
0465:
0466: // 6b - append the relative URI path
0467: path = path.concat(m_path);
0468:
0469: // 6c - remove all "./" where "." is a complete path segment
0470: index = -1;
0471: while ((index = path.indexOf("/./")) != -1) {
0472: path = path.substring(0, index + 1).concat(
0473: path.substring(index + 3));
0474: }
0475:
0476: // 6d - remove "." if path ends with "." as a complete path segment
0477: if (path.endsWith("/.")) {
0478: path = path.substring(0, path.length() - 1);
0479: }
0480:
0481: // 6e - remove all "<segment>/../" where "<segment>" is a complete
0482: // path segment not equal to ".."
0483: index = 1;
0484: int segIndex = -1;
0485: String tempString = null;
0486:
0487: while ((index = path.indexOf("/../", index)) > 0) {
0488: tempString = path.substring(0, path.indexOf("/../"));
0489: segIndex = tempString.lastIndexOf('/');
0490: if (segIndex != -1) {
0491: if (!tempString.substring(segIndex++).equals("..")) {
0492: path = path.substring(0, segIndex).concat(
0493: path.substring(index + 4));
0494: } else
0495: index += 4;
0496: } else
0497: index += 4;
0498: }
0499:
0500: // 6f - remove ending "<segment>/.." where "<segment>" is a
0501: // complete path segment
0502: if (path.endsWith("/..")) {
0503: tempString = path.substring(0, path.length() - 3);
0504: segIndex = tempString.lastIndexOf('/');
0505: if (segIndex != -1) {
0506: path = path.substring(0, segIndex + 1);
0507: }
0508: }
0509: m_path = path;
0510: }
0511: }
0512:
0513: /**
0514: * Initialize the scheme for this URI from a URI string spec.
0515: *
0516: * @param p_uriSpec the URI specification (cannot be null)
0517: *
0518: * @exception MalformedURIException if URI does not have a conformant
0519: * scheme
0520: */
0521: private void initializeScheme(String p_uriSpec)
0522: throws MalformedURIException {
0523: int uriSpecLen = p_uriSpec.length();
0524: int index = 0;
0525: String scheme = null;
0526: char testChar = '\0';
0527:
0528: while (index < uriSpecLen) {
0529: testChar = p_uriSpec.charAt(index);
0530: if (testChar == ':' || testChar == '/' || testChar == '?'
0531: || testChar == '#') {
0532: break;
0533: }
0534: index++;
0535: }
0536: scheme = p_uriSpec.substring(0, index);
0537:
0538: if (scheme.length() == 0) {
0539: throw new MalformedURIException("No scheme found in URI.");
0540: } else {
0541: setScheme(scheme);
0542: }
0543: }
0544:
0545: /**
0546: * Initialize the authority (userinfo, host and port) for this
0547: * URI from a URI string spec.
0548: *
0549: * @param p_uriSpec the URI specification (cannot be null)
0550: *
0551: * @exception MalformedURIException if p_uriSpec violates syntax rules
0552: */
0553: private void initializeAuthority(String p_uriSpec)
0554: throws MalformedURIException {
0555: int index = 0;
0556: int start = 0;
0557: int end = p_uriSpec.length();
0558: char testChar = '\0';
0559: String userinfo = null;
0560:
0561: // userinfo is everything up @
0562: if (p_uriSpec.indexOf('@', start) != -1) {
0563: while (index < end) {
0564: testChar = p_uriSpec.charAt(index);
0565: if (testChar == '@') {
0566: break;
0567: }
0568: index++;
0569: }
0570: userinfo = p_uriSpec.substring(start, index);
0571: index++;
0572: }
0573:
0574: // host is everything up to ':'
0575: String host = null;
0576: start = index;
0577: while (index < end) {
0578: testChar = p_uriSpec.charAt(index);
0579: if (testChar == ':') {
0580: break;
0581: }
0582: index++;
0583: }
0584: host = p_uriSpec.substring(start, index);
0585: int port = -1;
0586: if (host.length() > 0) {
0587: // port
0588: if (testChar == ':') {
0589: index++;
0590: start = index;
0591: while (index < end) {
0592: index++;
0593: }
0594: String portStr = p_uriSpec.substring(start, index);
0595: if (portStr.length() > 0) {
0596: for (int i = 0; i < portStr.length(); i++) {
0597: if (!isDigit(portStr.charAt(i))) {
0598: throw new MalformedURIException(
0599: portStr
0600: + " is invalid. Port should only contain digits!");
0601: }
0602: }
0603: try {
0604: port = Integer.parseInt(portStr);
0605: } catch (NumberFormatException nfe) {
0606: // can't happen
0607: }
0608: }
0609: }
0610: }
0611: setHost(host);
0612: setPort(port);
0613: setUserinfo(userinfo);
0614: }
0615:
0616: /**
0617: * Initialize the path for this URI from a URI string spec.
0618: *
0619: * @param p_uriSpec the URI specification (cannot be null)
0620: *
0621: * @exception MalformedURIException if p_uriSpec violates syntax rules
0622: */
0623: private void initializePath(String p_uriSpec)
0624: throws MalformedURIException {
0625: if (p_uriSpec == null) {
0626: throw new MalformedURIException(
0627: "Cannot initialize path from null string!");
0628: }
0629:
0630: int index = 0;
0631: int start = 0;
0632: int end = p_uriSpec.length();
0633: char testChar = '\0';
0634:
0635: // path - everything up to query string or fragment
0636: while (index < end) {
0637: testChar = p_uriSpec.charAt(index);
0638: if (testChar == '?' || testChar == '#') {
0639: break;
0640: }
0641: // check for valid escape sequence
0642: if (testChar == '%') {
0643: if (index + 2 >= end
0644: || !isHex(p_uriSpec.charAt(index + 1))
0645: || !isHex(p_uriSpec.charAt(index + 2))) {
0646: throw new MalformedURIException(
0647: "Path contains invalid escape sequence!");
0648: }
0649: } else if (!isReservedCharacter(testChar)
0650: && !isUnreservedCharacter(testChar)) {
0651: throw new MalformedURIException(
0652: "Path contains invalid character: " + testChar);
0653: }
0654: index++;
0655: }
0656: m_path = p_uriSpec.substring(start, index);
0657:
0658: // query - starts with ? and up to fragment or end
0659: if (testChar == '?') {
0660: index++;
0661: start = index;
0662: while (index < end) {
0663: testChar = p_uriSpec.charAt(index);
0664: if (testChar == '#') {
0665: break;
0666: }
0667: if (testChar == '%') {
0668: if (index + 2 >= end
0669: || !isHex(p_uriSpec.charAt(index + 1))
0670: || !isHex(p_uriSpec.charAt(index + 2))) {
0671: throw new MalformedURIException(
0672: "Query string contains invalid escape sequence!");
0673: }
0674: } else if (!isReservedCharacter(testChar)
0675: && !isUnreservedCharacter(testChar)) {
0676: throw new MalformedURIException(
0677: "Query string contains invalid character:"
0678: + testChar);
0679: }
0680: index++;
0681: }
0682: m_queryString = p_uriSpec.substring(start, index);
0683: }
0684:
0685: // fragment - starts with #
0686: if (testChar == '#') {
0687: index++;
0688: start = index;
0689: while (index < end) {
0690: testChar = p_uriSpec.charAt(index);
0691:
0692: if (testChar == '%') {
0693: if (index + 2 >= end
0694: || !isHex(p_uriSpec.charAt(index + 1))
0695: || !isHex(p_uriSpec.charAt(index + 2))) {
0696: throw new MalformedURIException(
0697: "Fragment contains invalid escape sequence!");
0698: }
0699: } else if (!isReservedCharacter(testChar)
0700: && !isUnreservedCharacter(testChar)) {
0701: throw new MalformedURIException(
0702: "Fragment contains invalid character:"
0703: + testChar);
0704: }
0705: index++;
0706: }
0707: m_fragment = p_uriSpec.substring(start, index);
0708: }
0709: }
0710:
0711: /**
0712: * Get the scheme for this URI.
0713: *
0714: * @return the scheme for this URI
0715: */
0716: public String getScheme() {
0717: return m_scheme;
0718: }
0719:
0720: /**
0721: * Get the scheme-specific part for this URI (everything following the
0722: * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
0723: *
0724: * @return the scheme-specific part for this URI
0725: */
0726: public String getSchemeSpecificPart() {
0727: StringBuffer schemespec = new StringBuffer();
0728:
0729: if (m_userinfo != null || m_host != null || m_port != -1) {
0730: schemespec.append("//");
0731: }
0732:
0733: if (m_userinfo != null) {
0734: schemespec.append(m_userinfo);
0735: schemespec.append('@');
0736: }
0737:
0738: if (m_host != null) {
0739: schemespec.append(m_host);
0740: }
0741:
0742: if (m_port != -1) {
0743: schemespec.append(':');
0744: schemespec.append(m_port);
0745: }
0746:
0747: if (m_path != null) {
0748: schemespec.append((m_path));
0749: }
0750:
0751: if (m_queryString != null) {
0752: schemespec.append('?');
0753: schemespec.append(m_queryString);
0754: }
0755:
0756: if (m_fragment != null) {
0757: schemespec.append('#');
0758: schemespec.append(m_fragment);
0759: }
0760:
0761: return schemespec.toString();
0762: }
0763:
0764: /**
0765: * Get the userinfo for this URI.
0766: *
0767: * @return the userinfo for this URI (null if not specified).
0768: */
0769: public String getUserinfo() {
0770: return m_userinfo;
0771: }
0772:
0773: /**
0774: * Get the host for this URI.
0775: *
0776: * @return the host for this URI (null if not specified).
0777: */
0778: public String getHost() {
0779: return m_host;
0780: }
0781:
0782: /**
0783: * Get the port for this URI.
0784: *
0785: * @return the port for this URI (-1 if not specified).
0786: */
0787: public int getPort() {
0788: return m_port;
0789: }
0790:
0791: /**
0792: * Get the path for this URI (optionally with the query string and
0793: * fragment).
0794: *
0795: * @param p_includeQueryString if true (and query string is not null),
0796: * then a "?" followed by the query string
0797: * will be appended
0798: * @param p_includeFragment if true (and fragment is not null),
0799: * then a "#" followed by the fragment
0800: * will be appended
0801: *
0802: * @return the path for this URI possibly including the query string
0803: * and fragment
0804: */
0805: public String getPath(boolean p_includeQueryString,
0806: boolean p_includeFragment) {
0807: StringBuffer pathString = new StringBuffer(m_path);
0808:
0809: if (p_includeQueryString && m_queryString != null) {
0810: pathString.append('?');
0811: pathString.append(m_queryString);
0812: }
0813:
0814: if (p_includeFragment && m_fragment != null) {
0815: pathString.append('#');
0816: pathString.append(m_fragment);
0817: }
0818: return pathString.toString();
0819: }
0820:
0821: /**
0822: * Get the path for this URI. Note that the value returned is the path
0823: * only and does not include the query string or fragment.
0824: *
0825: * @return the path for this URI.
0826: */
0827: public String getPath() {
0828: return m_path;
0829: }
0830:
0831: /**
0832: * Get the query string for this URI.
0833: *
0834: * @return the query string for this URI. Null is returned if there
0835: * was no "?" in the URI spec, empty string if there was a
0836: * "?" but no query string following it.
0837: */
0838: public String getQueryString() {
0839: return m_queryString;
0840: }
0841:
0842: /**
0843: * Get the fragment for this URI.
0844: *
0845: * @return the fragment for this URI. Null is returned if there
0846: * was no "#" in the URI spec, empty string if there was a
0847: * "#" but no fragment following it.
0848: */
0849: public String getFragment() {
0850: return m_fragment;
0851: }
0852:
0853: /**
0854: * Set the scheme for this URI. The scheme is converted to lowercase
0855: * before it is set.
0856: *
0857: * @param p_scheme the scheme for this URI (cannot be null)
0858: *
0859: * @exception MalformedURIException if p_scheme is not a conformant
0860: * scheme name
0861: */
0862: public void setScheme(String p_scheme) throws MalformedURIException {
0863: if (p_scheme == null) {
0864: throw new MalformedURIException(
0865: "Cannot set scheme from null string!");
0866: }
0867: if (!isConformantSchemeName(p_scheme)) {
0868: throw new MalformedURIException(
0869: "The scheme is not conformant.");
0870: }
0871:
0872: m_scheme = p_scheme.toLowerCase();
0873: }
0874:
0875: /**
0876: * Set the userinfo for this URI. If a non-null value is passed in and
0877: * the host value is null, then an exception is thrown.
0878: *
0879: * @param p_userinfo the userinfo for this URI
0880: *
0881: * @exception MalformedURIException if p_userinfo contains invalid
0882: * characters
0883: */
0884: public void setUserinfo(String p_userinfo)
0885: throws MalformedURIException {
0886: if (p_userinfo == null) {
0887: m_userinfo = null;
0888: } else {
0889: if (m_host == null) {
0890: throw new MalformedURIException(
0891: "Userinfo cannot be set when host is null!");
0892: }
0893:
0894: // userinfo can contain alphanumerics, mark characters, escaped
0895: // and ';',':','&','=','+','$',','
0896: int index = 0;
0897: int end = p_userinfo.length();
0898: char testChar = '\0';
0899: while (index < end) {
0900: testChar = p_userinfo.charAt(index);
0901: if (testChar == '%') {
0902: if (index + 2 >= end
0903: || !isHex(p_userinfo.charAt(index + 1))
0904: || !isHex(p_userinfo.charAt(index + 2))) {
0905: throw new MalformedURIException(
0906: "Userinfo contains invalid escape sequence!");
0907: }
0908: } else if (!isUnreservedCharacter(testChar)
0909: && USERINFO_CHARACTERS.indexOf(testChar) == -1) {
0910: throw new MalformedURIException(
0911: "Userinfo contains invalid character:"
0912: + testChar);
0913: }
0914: index++;
0915: }
0916: }
0917: m_userinfo = p_userinfo;
0918: }
0919:
0920: /**
0921: * Set the host for this URI. If null is passed in, the userinfo
0922: * field is also set to null and the port is set to -1.
0923: *
0924: * @param p_host the host for this URI
0925: *
0926: * @exception MalformedURIException if p_host is not a valid IP
0927: * address or DNS hostname.
0928: */
0929: public void setHost(String p_host) throws MalformedURIException {
0930: if (p_host == null || p_host.trim().length() == 0) {
0931: m_host = p_host;
0932: m_userinfo = null;
0933: m_port = -1;
0934: } else if (!isWellFormedAddress(p_host)) {
0935: throw new MalformedURIException(
0936: "Host is not a well formed address!");
0937: }
0938: m_host = p_host;
0939: }
0940:
0941: /**
0942: * Set the port for this URI. -1 is used to indicate that the port is
0943: * not specified, otherwise valid port numbers are between 0 and 65535.
0944: * If a valid port number is passed in and the host field is null,
0945: * an exception is thrown.
0946: *
0947: * @param p_port the port number for this URI
0948: *
0949: * @exception MalformedURIException if p_port is not -1 and not a
0950: * valid port number
0951: */
0952: public void setPort(int p_port) throws MalformedURIException {
0953: if (p_port >= 0 && p_port <= 65535) {
0954: if (m_host == null) {
0955: throw new MalformedURIException(
0956: "Port cannot be set when host is null!");
0957: }
0958: } else if (p_port != -1) {
0959: throw new MalformedURIException("Invalid port number!");
0960: }
0961: m_port = p_port;
0962: }
0963:
0964: /**
0965: * Set the path for this URI. If the supplied path is null, then the
0966: * query string and fragment are set to null as well. If the supplied
0967: * path includes a query string and/or fragment, these fields will be
0968: * parsed and set as well. Note that, for URIs following the "generic
0969: * URI" syntax, the path specified should start with a slash.
0970: * For URIs that do not follow the generic URI syntax, this method
0971: * sets the scheme-specific part.
0972: *
0973: * @param p_path the path for this URI (may be null)
0974: *
0975: * @exception MalformedURIException if p_path contains invalid
0976: * characters
0977: */
0978: public void setPath(String p_path) throws MalformedURIException {
0979: if (p_path == null) {
0980: m_path = null;
0981: m_queryString = null;
0982: m_fragment = null;
0983: } else {
0984: initializePath(p_path);
0985: }
0986: }
0987:
0988: /**
0989: * Append to the end of the path of this URI. If the current path does
0990: * not end in a slash and the path to be appended does not begin with
0991: * a slash, a slash will be appended to the current path before the
0992: * new segment is added. Also, if the current path ends in a slash
0993: * and the new segment begins with a slash, the extra slash will be
0994: * removed before the new segment is appended.
0995: *
0996: * @param p_addToPath the new segment to be added to the current path
0997: *
0998: * @exception MalformedURIException if p_addToPath contains syntax
0999: * errors
1000: */
1001: public void appendPath(String p_addToPath)
1002: throws MalformedURIException {
1003: if (p_addToPath == null || p_addToPath.trim().length() == 0) {
1004: return;
1005: }
1006:
1007: if (!isURIString(p_addToPath)) {
1008: throw new MalformedURIException(
1009: "Path contains invalid character!");
1010: }
1011:
1012: if (m_path == null || m_path.trim().length() == 0) {
1013: if (p_addToPath.startsWith("/")) {
1014: m_path = p_addToPath;
1015: } else {
1016: m_path = "/" + p_addToPath;
1017: }
1018: } else if (m_path.endsWith("/")) {
1019: if (p_addToPath.startsWith("/")) {
1020: m_path = m_path.concat(p_addToPath.substring(1));
1021: } else {
1022: m_path = m_path.concat(p_addToPath);
1023: }
1024: } else {
1025: if (p_addToPath.startsWith("/")) {
1026: m_path = m_path.concat(p_addToPath);
1027: } else {
1028: m_path = m_path.concat("/" + p_addToPath);
1029: }
1030: }
1031: }
1032:
1033: /**
1034: * Set the query string for this URI. A non-null value is valid only
1035: * if this is an URI conforming to the generic URI syntax and
1036: * the path value is not null.
1037: *
1038: * @param p_queryString the query string for this URI
1039: *
1040: * @exception MalformedURIException if p_queryString is not null and this
1041: * URI does not conform to the generic
1042: * URI syntax or if the path is null
1043: */
1044: public void setQueryString(String p_queryString)
1045: throws MalformedURIException {
1046: if (p_queryString == null) {
1047: m_queryString = null;
1048: } else if (!isGenericURI()) {
1049: throw new MalformedURIException(
1050: "Query string can only be set for a generic URI!");
1051: } else if (getPath() == null) {
1052: throw new MalformedURIException(
1053: "Query string cannot be set when path is null!");
1054: } else if (!isURIString(p_queryString)) {
1055: throw new MalformedURIException(
1056: "Query string contains invalid character!");
1057: } else {
1058: m_queryString = p_queryString;
1059: }
1060: }
1061:
1062: /**
1063: * Set the fragment for this URI. A non-null value is valid only
1064: * if this is a URI conforming to the generic URI syntax and
1065: * the path value is not null.
1066: *
1067: * @param p_fragment the fragment for this URI
1068: *
1069: * @exception MalformedURIException if p_fragment is not null and this
1070: * URI does not conform to the generic
1071: * URI syntax or if the path is null
1072: */
1073: public void setFragment(String p_fragment)
1074: throws MalformedURIException {
1075: if (p_fragment == null) {
1076: m_fragment = null;
1077: } else if (!isGenericURI()) {
1078: throw new MalformedURIException(
1079: "Fragment can only be set for a generic URI!");
1080: } else if (getPath() == null) {
1081: throw new MalformedURIException(
1082: "Fragment cannot be set when path is null!");
1083: } else if (!isURIString(p_fragment)) {
1084: throw new MalformedURIException(
1085: "Fragment contains invalid character!");
1086: } else {
1087: m_fragment = p_fragment;
1088: }
1089: }
1090:
1091: /**
1092: * Determines if the passed-in Object is equivalent to this URI.
1093: *
1094: * @param p_test the Object to test for equality.
1095: *
1096: * @return true if p_test is a URI with all values equal to this
1097: * URI, false otherwise
1098: */
1099: public boolean equals(Object p_test) {
1100: if (p_test instanceof JaxmURI) {
1101: JaxmURI testURI = (JaxmURI) p_test;
1102: if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null
1103: && testURI.m_scheme != null && m_scheme
1104: .equals(testURI.m_scheme)))
1105: && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null
1106: && testURI.m_userinfo != null && m_userinfo
1107: .equals(testURI.m_userinfo)))
1108: && ((m_host == null && testURI.m_host == null) || (m_host != null
1109: && testURI.m_host != null && m_host
1110: .equals(testURI.m_host)))
1111: && m_port == testURI.m_port
1112: && ((m_path == null && testURI.m_path == null) || (m_path != null
1113: && testURI.m_path != null && m_path
1114: .equals(testURI.m_path)))
1115: && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null
1116: && testURI.m_queryString != null && m_queryString
1117: .equals(testURI.m_queryString)))
1118: && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null
1119: && testURI.m_fragment != null && m_fragment
1120: .equals(testURI.m_fragment)))) {
1121: return true;
1122: }
1123: }
1124: return false;
1125: }
1126:
1127: /**
1128: * Get the URI as a string specification. See RFC 2396 Section 5.2.
1129: *
1130: * @return the URI string specification
1131: */
1132: public String toString() {
1133: StringBuffer uriSpecString = new StringBuffer();
1134:
1135: if (m_scheme != null) {
1136: uriSpecString.append(m_scheme);
1137: uriSpecString.append(':');
1138: }
1139: uriSpecString.append(getSchemeSpecificPart());
1140: return uriSpecString.toString();
1141: }
1142:
1143: /**
1144: * Get the indicator as to whether this URI uses the "generic URI"
1145: * syntax.
1146: *
1147: * @return true if this URI uses the "generic URI" syntax, false
1148: * otherwise
1149: */
1150: public boolean isGenericURI() {
1151: // presence of the host (whether valid or empty) means
1152: // double-slashes which means generic uri
1153: return (m_host != null);
1154: }
1155:
1156: /**
1157: * Determine whether a scheme conforms to the rules for a scheme name.
1158: * A scheme is conformant if it starts with an alphanumeric, and
1159: * contains only alphanumerics, '+','-' and '.'.
1160: *
1161: * @return true if the scheme is conformant, false otherwise
1162: */
1163: public static boolean isConformantSchemeName(String p_scheme) {
1164: if (p_scheme == null || p_scheme.trim().length() == 0) {
1165: return false;
1166: }
1167:
1168: if (!isAlpha(p_scheme.charAt(0))) {
1169: return false;
1170: }
1171:
1172: char testChar;
1173: for (int i = 1; i < p_scheme.length(); i++) {
1174: testChar = p_scheme.charAt(i);
1175: if (!isAlphanum(testChar)
1176: && SCHEME_CHARACTERS.indexOf(testChar) == -1) {
1177: return false;
1178: }
1179: }
1180:
1181: return true;
1182: }
1183:
1184: /**
1185: * Determine whether a string is syntactically capable of representing
1186: * a valid IPv4 address or the domain name of a network host. A valid
1187: * IPv4 address consists of four decimal digit groups separated by a
1188: * '.'. A hostname consists of domain labels (each of which must
1189: * begin and end with an alphanumeric but may contain '-') separated
1190: & by a '.'. See RFC 2396 Section 3.2.2.
1191: *
1192: * @return true if the string is a syntactically valid IPv4 address
1193: * or hostname
1194: */
1195: public static boolean isWellFormedAddress(String p_address) {
1196: if (p_address == null) {
1197: return false;
1198: }
1199:
1200: String address = p_address.trim();
1201: int addrLength = address.length();
1202: if (addrLength == 0 || addrLength > 255) {
1203: return false;
1204: }
1205:
1206: if (address.startsWith(".") || address.startsWith("-")) {
1207: return false;
1208: }
1209:
1210: // rightmost domain label starting with digit indicates IP address
1211: // since top level domain label can only start with an alpha
1212: // see RFC 2396 Section 3.2.2
1213: int index = address.lastIndexOf('.');
1214: if (address.endsWith(".")) {
1215: index = address.substring(0, index).lastIndexOf('.');
1216: }
1217:
1218: if (index + 1 < addrLength
1219: && isDigit(p_address.charAt(index + 1))) {
1220: char testChar;
1221: int numDots = 0;
1222:
1223: // make sure that 1) we see only digits and dot separators, 2) that
1224: // any dot separator is preceded and followed by a digit and
1225: // 3) that we find 3 dots
1226: for (int i = 0; i < addrLength; i++) {
1227: testChar = address.charAt(i);
1228: if (testChar == '.') {
1229: if (!isDigit(address.charAt(i - 1))
1230: || (i + 1 < addrLength && !isDigit(address
1231: .charAt(i + 1)))) {
1232: return false;
1233: }
1234: numDots++;
1235: } else if (!isDigit(testChar)) {
1236: return false;
1237: }
1238: }
1239: if (numDots != 3) {
1240: return false;
1241: }
1242: } else {
1243: // domain labels can contain alphanumerics and '-"
1244: // but must start and end with an alphanumeric
1245: char testChar;
1246:
1247: for (int i = 0; i < addrLength; i++) {
1248: testChar = address.charAt(i);
1249: if (testChar == '.') {
1250: if (!isAlphanum(address.charAt(i - 1))) {
1251: return false;
1252: }
1253: if (i + 1 < addrLength
1254: && !isAlphanum(address.charAt(i + 1))) {
1255: return false;
1256: }
1257: } else if (!isAlphanum(testChar) && testChar != '-') {
1258: return false;
1259: }
1260: }
1261: }
1262: return true;
1263: }
1264:
1265: /**
1266: * Determine whether a char is a digit.
1267: *
1268: * @return true if the char is betweeen '0' and '9', false otherwise
1269: */
1270: private static boolean isDigit(char p_char) {
1271: return p_char >= '0' && p_char <= '9';
1272: }
1273:
1274: /**
1275: * Determine whether a character is a hexadecimal character.
1276: *
1277: * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1278: * or 'A' and 'F', false otherwise
1279: */
1280: private static boolean isHex(char p_char) {
1281: return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F'));
1282: }
1283:
1284: /**
1285: * Determine whether a char is an alphabetic character: a-z or A-Z
1286: *
1287: * @return true if the char is alphabetic, false otherwise
1288: */
1289: private static boolean isAlpha(char p_char) {
1290: return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z'));
1291: }
1292:
1293: /**
1294: * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1295: *
1296: * @return true if the char is alphanumeric, false otherwise
1297: */
1298: private static boolean isAlphanum(char p_char) {
1299: return (isAlpha(p_char) || isDigit(p_char));
1300: }
1301:
1302: /**
1303: * Determine whether a character is a reserved character:
1304: * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1305: *
1306: * @return true if the string contains any reserved characters
1307: */
1308: private static boolean isReservedCharacter(char p_char) {
1309: return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1310: }
1311:
1312: /**
1313: * Determine whether a char is an unreserved character.
1314: *
1315: * @return true if the char is unreserved, false otherwise
1316: */
1317: private static boolean isUnreservedCharacter(char p_char) {
1318: return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1319: }
1320:
1321: /**
1322: * Determine whether a given string contains only URI characters (also
1323: * called "uric" in RFC 2396). uric consist of all reserved
1324: * characters, unreserved characters and escaped characters.
1325: *
1326: * @return true if the string is comprised of uric, false otherwise
1327: */
1328: private static boolean isURIString(String p_uric) {
1329: if (p_uric == null) {
1330: return false;
1331: }
1332: int end = p_uric.length();
1333: char testChar = '\0';
1334: for (int i = 0; i < end; i++) {
1335: testChar = p_uric.charAt(i);
1336: if (testChar == '%') {
1337: if (i + 2 >= end || !isHex(p_uric.charAt(i + 1))
1338: || !isHex(p_uric.charAt(i + 2))) {
1339: return false;
1340: } else {
1341: i += 2;
1342: continue;
1343: }
1344: }
1345: if (isReservedCharacter(testChar)
1346: || isUnreservedCharacter(testChar)) {
1347: continue;
1348: } else {
1349: return false;
1350: }
1351: }
1352: return true;
1353: }
1354: }
|