0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package java.net;
0019:
0020: import java.io.IOException;
0021: import java.io.ObjectInputStream;
0022: import java.io.ObjectOutputStream;
0023: import java.io.Serializable;
0024: import java.io.UnsupportedEncodingException;
0025: import java.util.StringTokenizer;
0026:
0027: import org.apache.harmony.luni.util.Msg;
0028:
0029: /**
0030: * This class represents an instance of a URI as defined by RFC 2396.
0031: */
0032: public final class URI implements Comparable<URI>, Serializable {
0033:
0034: private static final long serialVersionUID = -6052424284110960213l;
0035:
0036: static final String unreserved = "_-!.~\'()*"; //$NON-NLS-1$
0037:
0038: static final String punct = ",;:$&+="; //$NON-NLS-1$
0039:
0040: static final String reserved = punct + "?/[]@"; //$NON-NLS-1$
0041:
0042: static final String someLegal = unreserved + punct;
0043:
0044: static final String allLegal = unreserved + reserved;
0045:
0046: private String string;
0047:
0048: private transient String scheme;
0049:
0050: private transient String schemespecificpart;
0051:
0052: private transient String authority;
0053:
0054: private transient String userinfo;
0055:
0056: private transient String host;
0057:
0058: private transient int port = -1;
0059:
0060: private transient String path;
0061:
0062: private transient String query;
0063:
0064: private transient String fragment;
0065:
0066: private transient boolean opaque;
0067:
0068: private transient boolean absolute;
0069:
0070: private transient boolean serverAuthority = false;
0071:
0072: private transient int hash = -1;
0073:
0074: private URI() {
0075: }
0076:
0077: public URI(String uri) throws URISyntaxException {
0078: new Helper().parseURI(uri, false);
0079: }
0080:
0081: public URI(String scheme, String ssp, String frag)
0082: throws URISyntaxException {
0083: StringBuffer uri = new StringBuffer();
0084: if (scheme != null) {
0085: uri.append(scheme);
0086: uri.append(':');
0087: }
0088: if (ssp != null) {
0089: // QUOTE ILLEGAL CHARACTERS
0090: uri.append(quoteComponent(ssp, allLegal));
0091: }
0092: if (frag != null) {
0093: uri.append('#');
0094: // QUOTE ILLEGAL CHARACTERS
0095: uri.append(quoteComponent(frag, allLegal));
0096: }
0097:
0098: new Helper().parseURI(uri.toString(), false);
0099: }
0100:
0101: public URI(String scheme, String userinfo, String host, int port,
0102: String path, String query, String fragment)
0103: throws URISyntaxException {
0104:
0105: if (scheme == null && userinfo == null && host == null
0106: && path == null && query == null && fragment == null) {
0107: this .path = ""; //$NON-NLS-1$
0108: return;
0109: }
0110:
0111: if (scheme != null && path != null && path.length() > 0
0112: && path.charAt(0) != '/') {
0113: throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$
0114: }
0115:
0116: StringBuffer uri = new StringBuffer();
0117: if (scheme != null) {
0118: uri.append(scheme);
0119: uri.append(':');
0120: }
0121:
0122: if (userinfo != null || host != null || port != -1) {
0123: uri.append("//"); //$NON-NLS-1$
0124: }
0125:
0126: if (userinfo != null) {
0127: // QUOTE ILLEGAL CHARACTERS in userinfo
0128: uri.append(quoteComponent(userinfo, someLegal));
0129: uri.append('@');
0130: }
0131:
0132: if (host != null) {
0133: // check for ipv6 addresses that hasn't been enclosed
0134: // in square brackets
0135: if (host.indexOf(':') != -1 && host.indexOf(']') == -1
0136: && host.indexOf('[') == -1) {
0137: host = "[" + host + "]"; //$NON-NLS-1$ //$NON-NLS-2$
0138: }
0139: uri.append(host);
0140: }
0141:
0142: if (port != -1) {
0143: uri.append(':');
0144: uri.append(port);
0145: }
0146:
0147: if (path != null) {
0148: // QUOTE ILLEGAL CHARS
0149: uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$
0150: }
0151:
0152: if (query != null) {
0153: uri.append('?');
0154: // QUOTE ILLEGAL CHARS
0155: uri.append(quoteComponent(query, allLegal));
0156: }
0157:
0158: if (fragment != null) {
0159: // QUOTE ILLEGAL CHARS
0160: uri.append('#');
0161: uri.append(quoteComponent(fragment, allLegal));
0162: }
0163:
0164: new Helper().parseURI(uri.toString(), true);
0165: }
0166:
0167: public URI(String scheme, String host, String path, String fragment)
0168: throws URISyntaxException {
0169: this (scheme, null, host, -1, path, null, fragment);
0170: }
0171:
0172: public URI(String scheme, String authority, String path,
0173: String query, String fragment) throws URISyntaxException {
0174: if (scheme != null && path != null && path.length() > 0
0175: && path.charAt(0) != '/') {
0176: throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$
0177: }
0178:
0179: StringBuffer uri = new StringBuffer();
0180: if (scheme != null) {
0181: uri.append(scheme);
0182: uri.append(':');
0183: }
0184: if (authority != null) {
0185: uri.append("//"); //$NON-NLS-1$
0186: // QUOTE ILLEGAL CHARS
0187: uri.append(quoteComponent(authority, "@[]" + someLegal)); //$NON-NLS-1$
0188: }
0189:
0190: if (path != null) {
0191: // QUOTE ILLEGAL CHARS
0192: uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$
0193: }
0194: if (query != null) {
0195: // QUOTE ILLEGAL CHARS
0196: uri.append('?');
0197: uri.append(quoteComponent(query, allLegal));
0198: }
0199: if (fragment != null) {
0200: // QUOTE ILLEGAL CHARS
0201: uri.append('#');
0202: uri.append(quoteComponent(fragment, allLegal));
0203: }
0204:
0205: new Helper().parseURI(uri.toString(), false);
0206: }
0207:
0208: private class Helper {
0209:
0210: private void parseURI(String uri, boolean forceServer)
0211: throws URISyntaxException {
0212: String temp = uri;
0213: // assign uri string to the input value per spec
0214: string = uri;
0215: int index, index1, index2, index3;
0216: // parse into Fragment, Scheme, and SchemeSpecificPart
0217: // then parse SchemeSpecificPart if necessary
0218:
0219: // Fragment
0220: index = temp.indexOf('#');
0221: if (index != -1) {
0222: // remove the fragment from the end
0223: fragment = temp.substring(index + 1);
0224: validateFragment(uri, fragment, index + 1);
0225: temp = temp.substring(0, index);
0226: }
0227:
0228: // Scheme and SchemeSpecificPart
0229: index = index1 = temp.indexOf(':');
0230: index2 = temp.indexOf('/');
0231: index3 = temp.indexOf('?');
0232:
0233: // if a '/' or '?' occurs before the first ':' the uri has no
0234: // specified scheme, and is therefore not absolute
0235: if (index != -1 && (index2 >= index || index2 == -1)
0236: && (index3 >= index || index3 == -1)) {
0237: // the characters up to the first ':' comprise the scheme
0238: absolute = true;
0239: scheme = temp.substring(0, index);
0240: if (scheme.length() == 0) {
0241: throw new URISyntaxException(uri, Msg
0242: .getString("K0342"), //$NON-NLS-1$
0243: index);
0244: }
0245: validateScheme(uri, scheme, 0);
0246: schemespecificpart = temp.substring(index + 1);
0247: if (schemespecificpart.length() == 0) {
0248: throw new URISyntaxException(uri, Msg
0249: .getString("K0303"), //$NON-NLS-1$
0250: index + 1);
0251: }
0252: } else {
0253: absolute = false;
0254: schemespecificpart = temp;
0255: }
0256:
0257: if (scheme == null || schemespecificpart.length() > 0
0258: && schemespecificpart.charAt(0) == '/') {
0259: opaque = false;
0260: // the URI is hierarchical
0261:
0262: // Query
0263: temp = schemespecificpart;
0264: index = temp.indexOf('?');
0265: if (index != -1) {
0266: query = temp.substring(index + 1);
0267: temp = temp.substring(0, index);
0268: validateQuery(uri, query, index2 + 1 + index);
0269: }
0270:
0271: // Authority and Path
0272: if (temp.startsWith("//")) { //$NON-NLS-1$
0273: index = temp.indexOf('/', 2);
0274: if (index != -1) {
0275: authority = temp.substring(2, index);
0276: path = temp.substring(index);
0277: } else {
0278: authority = temp.substring(2);
0279: if (authority.length() == 0 && query == null
0280: && fragment == null) {
0281: throw new URISyntaxException(uri, Msg
0282: .getString("K0304"), uri.length()); //$NON-NLS-1$
0283: }
0284:
0285: path = ""; //$NON-NLS-1$
0286: // nothing left, so path is empty (not null, path should
0287: // never be null)
0288: }
0289:
0290: if (authority.length() == 0) {
0291: authority = null;
0292: } else {
0293: validateAuthority(uri, authority, index1 + 3);
0294: }
0295: } else { // no authority specified
0296: path = temp;
0297: }
0298:
0299: int pathIndex = 0;
0300: if (index2 > -1) {
0301: pathIndex += index2;
0302: }
0303: if (index > -1) {
0304: pathIndex += index;
0305: }
0306: validatePath(uri, path, pathIndex);
0307: } else { // if not hierarchical, URI is opaque
0308: opaque = true;
0309: validateSsp(uri, schemespecificpart, index2 + 2 + index);
0310: }
0311:
0312: parseAuthority(forceServer);
0313: }
0314:
0315: private void validateScheme(String uri, String scheme, int index)
0316: throws URISyntaxException {
0317: // first char needs to be an alpha char
0318: char ch = scheme.charAt(0);
0319: if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
0320: throw new URISyntaxException(uri, Msg
0321: .getString("K0305"), 0); //$NON-NLS-1$
0322: }
0323:
0324: try {
0325: URIEncoderDecoder.validateSimple(scheme, "+-."); //$NON-NLS-1$
0326: } catch (URISyntaxException e) {
0327: throw new URISyntaxException(uri, Msg
0328: .getString("K0305"), index //$NON-NLS-1$
0329: + e.getIndex());
0330: }
0331: }
0332:
0333: private void validateSsp(String uri, String ssp, int index)
0334: throws URISyntaxException {
0335: try {
0336: URIEncoderDecoder.validate(ssp, allLegal);
0337: } catch (URISyntaxException e) {
0338: throw new URISyntaxException(uri, Msg.getString(
0339: "K0306", e //$NON-NLS-1$
0340: .getReason()), index + e.getIndex());
0341: }
0342: }
0343:
0344: private void validateAuthority(String uri, String authority,
0345: int index) throws URISyntaxException {
0346: try {
0347: URIEncoderDecoder
0348: .validate(authority, "@[]" + someLegal); //$NON-NLS-1$
0349: } catch (URISyntaxException e) {
0350: throw new URISyntaxException(uri, Msg.getString(
0351: "K0307", e //$NON-NLS-1$
0352: .getReason()), index + e.getIndex());
0353: }
0354: }
0355:
0356: private void validatePath(String uri, String path, int index)
0357: throws URISyntaxException {
0358: try {
0359: URIEncoderDecoder.validate(path, "/@" + someLegal); //$NON-NLS-1$
0360: } catch (URISyntaxException e) {
0361: throw new URISyntaxException(uri, Msg.getString(
0362: "K0308", e //$NON-NLS-1$
0363: .getReason()), index + e.getIndex());
0364: }
0365: }
0366:
0367: private void validateQuery(String uri, String query, int index)
0368: throws URISyntaxException {
0369: try {
0370: URIEncoderDecoder.validate(query, allLegal);
0371: } catch (URISyntaxException e) {
0372: throw new URISyntaxException(uri, Msg.getString(
0373: "K0309", e //$NON-NLS-1$
0374: .getReason()), index + e.getIndex());
0375:
0376: }
0377: }
0378:
0379: private void validateFragment(String uri, String fragment,
0380: int index) throws URISyntaxException {
0381: try {
0382: URIEncoderDecoder.validate(fragment, allLegal);
0383: } catch (URISyntaxException e) {
0384: throw new URISyntaxException(uri, Msg.getString(
0385: "K030a", e //$NON-NLS-1$
0386: .getReason()), index + e.getIndex());
0387: }
0388: }
0389:
0390: /**
0391: * determine the host, port and userinfo if the authority parses
0392: * successfully to a server based authority
0393: *
0394: * behavour in error cases: if forceServer is true, throw
0395: * URISyntaxException with the proper diagnostic messages. if
0396: * forceServer is false assume this is a registry based uri, and just
0397: * return leaving the host, port and userinfo fields undefined.
0398: *
0399: * and there are some error cases where URISyntaxException is thrown
0400: * regardless of the forceServer parameter e.g. malformed ipv6 address
0401: */
0402: private void parseAuthority(boolean forceServer)
0403: throws URISyntaxException {
0404: if (authority == null) {
0405: return;
0406: }
0407:
0408: String temp, tempUserinfo = null, tempHost = null;
0409: int index, hostindex = 0;
0410: int tempPort = -1;
0411:
0412: temp = authority;
0413: index = temp.indexOf('@');
0414: if (index != -1) {
0415: // remove user info
0416: tempUserinfo = temp.substring(0, index);
0417: validateUserinfo(authority, tempUserinfo, 0);
0418: temp = temp.substring(index + 1); // host[:port] is left
0419: hostindex = index + 1;
0420: }
0421:
0422: index = temp.lastIndexOf(':');
0423: int endindex = temp.indexOf(']');
0424:
0425: if (index != -1 && endindex < index) {
0426: // determine port and host
0427: tempHost = temp.substring(0, index);
0428:
0429: if (index < (temp.length() - 1)) { // port part is not empty
0430: try {
0431: tempPort = Integer.parseInt(temp
0432: .substring(index + 1));
0433: if (tempPort < 0) {
0434: if (forceServer) {
0435: throw new URISyntaxException(
0436: authority,
0437: Msg.getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$
0438: }
0439: return;
0440: }
0441: } catch (NumberFormatException e) {
0442: if (forceServer) {
0443: throw new URISyntaxException(
0444: authority,
0445: Msg.getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$
0446: }
0447: return;
0448: }
0449: }
0450: } else {
0451: tempHost = temp;
0452: }
0453:
0454: if (tempHost.equals("")) { //$NON-NLS-1$
0455: if (forceServer) {
0456: throw new URISyntaxException(authority, Msg
0457: .getString("K030c"), hostindex); //$NON-NLS-1$
0458: }
0459: return;
0460: }
0461:
0462: if (!isValidHost(forceServer, tempHost)) {
0463: return;
0464: }
0465:
0466: // this is a server based uri,
0467: // fill in the userinfo, host and port fields
0468: userinfo = tempUserinfo;
0469: host = tempHost;
0470: port = tempPort;
0471: serverAuthority = true;
0472: }
0473:
0474: private void validateUserinfo(String uri, String userinfo,
0475: int index) throws URISyntaxException {
0476: for (int i = 0; i < userinfo.length(); i++) {
0477: char ch = userinfo.charAt(i);
0478: if (ch == ']' || ch == '[') {
0479: throw new URISyntaxException(uri, Msg
0480: .getString("K030d"), //$NON-NLS-1$
0481: index + i);
0482: }
0483: }
0484: }
0485:
0486: /**
0487: * distinguish between IPv4, IPv6, domain name and validate it based on
0488: * its type
0489: */
0490: private boolean isValidHost(boolean forceServer, String host)
0491: throws URISyntaxException {
0492: if (host.charAt(0) == '[') {
0493: // ipv6 address
0494: if (host.charAt(host.length() - 1) != ']') {
0495: throw new URISyntaxException(host, Msg
0496: .getString("K030e"), 0); //$NON-NLS-1$
0497: }
0498: if (!isValidIP6Address(host)) {
0499: throw new URISyntaxException(host, Msg
0500: .getString("K030f")); //$NON-NLS-1$
0501: }
0502: return true;
0503: }
0504:
0505: // '[' and ']' can only be the first char and last char
0506: // of the host name
0507: if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
0508: throw new URISyntaxException(host, Msg
0509: .getString("K0310"), 0); //$NON-NLS-1$
0510: }
0511:
0512: int index = host.lastIndexOf('.');
0513: if (index < 0 || index == host.length() - 1
0514: || !Character.isDigit(host.charAt(index + 1))) {
0515: // domain name
0516: if (isValidDomainName(host)) {
0517: return true;
0518: }
0519: if (forceServer) {
0520: throw new URISyntaxException(host, Msg
0521: .getString("K0310"), 0); //$NON-NLS-1$
0522: }
0523: return false;
0524: }
0525:
0526: // IPv4 address
0527: if (isValidIPv4Address(host)) {
0528: return true;
0529: }
0530: if (forceServer) {
0531: throw new URISyntaxException(host, Msg
0532: .getString("K0311"), 0); //$NON-NLS-1$
0533: }
0534: return false;
0535: }
0536:
0537: private boolean isValidDomainName(String host) {
0538: try {
0539: URIEncoderDecoder.validateSimple(host, "-."); //$NON-NLS-1$
0540: } catch (URISyntaxException e) {
0541: return false;
0542: }
0543:
0544: String label = null;
0545: StringTokenizer st = new StringTokenizer(host, "."); //$NON-NLS-1$
0546: while (st.hasMoreTokens()) {
0547: label = st.nextToken();
0548: if (label.startsWith("-") || label.endsWith("-")) { //$NON-NLS-1$ //$NON-NLS-2$
0549: return false;
0550: }
0551: }
0552:
0553: if (!label.equals(host)) {
0554: char ch = label.charAt(0);
0555: if (ch >= '0' && ch <= '9') {
0556: return false;
0557: }
0558: }
0559: return true;
0560: }
0561:
0562: private boolean isValidIPv4Address(String host) {
0563: int index;
0564: int index2;
0565: try {
0566: int num;
0567: index = host.indexOf('.');
0568: num = Integer.parseInt(host.substring(0, index));
0569: if (num < 0 || num > 255) {
0570: return false;
0571: }
0572: index2 = host.indexOf('.', index + 1);
0573: num = Integer.parseInt(host
0574: .substring(index + 1, index2));
0575: if (num < 0 || num > 255) {
0576: return false;
0577: }
0578: index = host.indexOf('.', index2 + 1);
0579: num = Integer.parseInt(host
0580: .substring(index2 + 1, index));
0581: if (num < 0 || num > 255) {
0582: return false;
0583: }
0584: num = Integer.parseInt(host.substring(index + 1));
0585: if (num < 0 || num > 255) {
0586: return false;
0587: }
0588: } catch (Exception e) {
0589: return false;
0590: }
0591: return true;
0592: }
0593:
0594: private boolean isValidIP6Address(String ipAddress) {
0595: int length = ipAddress.length();
0596: boolean doubleColon = false;
0597: int numberOfColons = 0;
0598: int numberOfPeriods = 0;
0599: String word = ""; //$NON-NLS-1$
0600: char c = 0;
0601: char prevChar = 0;
0602: int offset = 0; // offset for [] ip addresses
0603:
0604: if (length < 2) {
0605: return false;
0606: }
0607:
0608: for (int i = 0; i < length; i++) {
0609: prevChar = c;
0610: c = ipAddress.charAt(i);
0611: switch (c) {
0612:
0613: // case for an open bracket [x:x:x:...x]
0614: case '[':
0615: if (i != 0) {
0616: return false; // must be first character
0617: }
0618: if (ipAddress.charAt(length - 1) != ']') {
0619: return false; // must have a close ]
0620: }
0621: if ((ipAddress.charAt(1) == ':')
0622: && (ipAddress.charAt(2) != ':')) {
0623: return false;
0624: }
0625: offset = 1;
0626: if (length < 4) {
0627: return false;
0628: }
0629: break;
0630:
0631: // case for a closed bracket at end of IP [x:x:x:...x]
0632: case ']':
0633: if (i != length - 1) {
0634: return false; // must be last character
0635: }
0636: if (ipAddress.charAt(0) != '[') {
0637: return false; // must have a open [
0638: }
0639: break;
0640:
0641: // case for the last 32-bits represented as IPv4
0642: // x:x:x:x:x:x:d.d.d.d
0643: case '.':
0644: numberOfPeriods++;
0645: if (numberOfPeriods > 3) {
0646: return false;
0647: }
0648: if (!isValidIP4Word(word)) {
0649: return false;
0650: }
0651: if (numberOfColons != 6 && !doubleColon) {
0652: return false;
0653: }
0654: // a special case ::1:2:3:4:5:d.d.d.d allows 7 colons
0655: // with
0656: // an IPv4 ending, otherwise 7 :'s is bad
0657: if (numberOfColons == 7
0658: && ipAddress.charAt(0 + offset) != ':'
0659: && ipAddress.charAt(1 + offset) != ':') {
0660: return false;
0661: }
0662: word = ""; //$NON-NLS-1$
0663: break;
0664:
0665: case ':':
0666: numberOfColons++;
0667: if (numberOfColons > 7) {
0668: return false;
0669: }
0670: if (numberOfPeriods > 0) {
0671: return false;
0672: }
0673: if (prevChar == ':') {
0674: if (doubleColon) {
0675: return false;
0676: }
0677: doubleColon = true;
0678: }
0679: word = ""; //$NON-NLS-1$
0680: break;
0681:
0682: default:
0683: if (word.length() > 3) {
0684: return false;
0685: }
0686: if (!isValidHexChar(c)) {
0687: return false;
0688: }
0689: word += c;
0690: }
0691: }
0692:
0693: // Check if we have an IPv4 ending
0694: if (numberOfPeriods > 0) {
0695: if (numberOfPeriods != 3 || !isValidIP4Word(word)) {
0696: return false;
0697: }
0698: } else {
0699: // If we're at then end and we haven't had 7 colons then there
0700: // is a problem unless we encountered a doubleColon
0701: if (numberOfColons != 7 && !doubleColon) {
0702: return false;
0703: }
0704:
0705: // If we have an empty word at the end, it means we ended in
0706: // either a : or a .
0707: // If we did not end in :: then this is invalid
0708: if (word == "" && ipAddress.charAt(length - 1 - offset) != ':' //$NON-NLS-1$
0709: && ipAddress.charAt(length - 2 - offset) != ':') {
0710: return false;
0711: }
0712: }
0713:
0714: return true;
0715: }
0716:
0717: private boolean isValidIP4Word(String word) {
0718: char c;
0719: if (word.length() < 1 || word.length() > 3) {
0720: return false;
0721: }
0722: for (int i = 0; i < word.length(); i++) {
0723: c = word.charAt(i);
0724: if (!(c >= '0' && c <= '9')) {
0725: return false;
0726: }
0727: }
0728: if (Integer.parseInt(word) > 255) {
0729: return false;
0730: }
0731: return true;
0732: }
0733:
0734: private boolean isValidHexChar(char c) {
0735:
0736: return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
0737: || (c >= 'a' && c <= 'f');
0738: }
0739: }
0740:
0741: /*
0742: * Quote illegal chars for each component, but not the others
0743: *
0744: * @param component java.lang.String the component to be converted @param
0745: * legalset java.lang.String the legal character set allowed in the
0746: * component s @return java.lang.String the converted string
0747: */
0748: private String quoteComponent(String component, String legalset) {
0749: try {
0750: /*
0751: * Use a different encoder than URLEncoder since: 1. chars like "/",
0752: * "#", "@" etc needs to be preserved instead of being encoded, 2.
0753: * UTF-8 char set needs to be used for encoding instead of default
0754: * platform one
0755: */
0756: return URIEncoderDecoder.quoteIllegal(component, legalset);
0757: } catch (UnsupportedEncodingException e) {
0758: throw new RuntimeException(e.toString());
0759: }
0760: }
0761:
0762: public int compareTo(URI uri) {
0763: int ret = 0;
0764:
0765: // compare schemes
0766: if (scheme == null && uri.scheme != null) {
0767: return -1;
0768: } else if (scheme != null && uri.scheme == null) {
0769: return 1;
0770: } else if (scheme != null && uri.scheme != null) {
0771: ret = scheme.compareToIgnoreCase(uri.scheme);
0772: if (ret != 0) {
0773: return ret;
0774: }
0775: }
0776:
0777: // compare opacities
0778: if (!opaque && uri.opaque) {
0779: return -1;
0780: } else if (opaque && !uri.opaque) {
0781: return 1;
0782: } else if (opaque && uri.opaque) {
0783: ret = schemespecificpart.compareTo(uri.schemespecificpart);
0784: if (ret != 0) {
0785: return ret;
0786: }
0787: } else {
0788:
0789: // otherwise both must be hierarchical
0790:
0791: // compare authorities
0792: if (authority != null && uri.authority == null) {
0793: return 1;
0794: } else if (authority == null && uri.authority != null) {
0795: return -1;
0796: } else if (authority != null && uri.authority != null) {
0797: if (host != null && uri.host != null) {
0798: // both are server based, so compare userinfo, host, port
0799: if (userinfo != null && uri.userinfo == null) {
0800: return 1;
0801: } else if (userinfo == null && uri.userinfo != null) {
0802: return -1;
0803: } else if (userinfo != null && uri.userinfo != null) {
0804: ret = userinfo.compareTo(uri.userinfo);
0805: if (ret != 0) {
0806: return ret;
0807: }
0808: }
0809:
0810: // userinfo's are the same, compare hostname
0811: ret = host.compareToIgnoreCase(uri.host);
0812: if (ret != 0) {
0813: return ret;
0814: }
0815:
0816: // compare port
0817: if (port != uri.port) {
0818: return port - uri.port;
0819: }
0820: } else { // one or both are registry based, compare the whole
0821: // authority
0822: ret = authority.compareTo(uri.authority);
0823: if (ret != 0) {
0824: return ret;
0825: }
0826: }
0827: }
0828:
0829: // authorities are the same
0830: // compare paths
0831: ret = path.compareTo(uri.path);
0832: if (ret != 0) {
0833: return ret;
0834: }
0835:
0836: // compare queries
0837:
0838: if (query != null && uri.query == null) {
0839: return 1;
0840: } else if (query == null && uri.query != null) {
0841: return -1;
0842: } else if (query != null && uri.query != null) {
0843: ret = query.compareTo(uri.query);
0844: if (ret != 0) {
0845: return ret;
0846: }
0847: }
0848: }
0849:
0850: // everything else is identical, so compare fragments
0851: if (fragment != null && uri.fragment == null) {
0852: return 1;
0853: } else if (fragment == null && uri.fragment != null) {
0854: return -1;
0855: } else if (fragment != null && uri.fragment != null) {
0856: ret = fragment.compareTo(uri.fragment);
0857: if (ret != 0) {
0858: return ret;
0859: }
0860: }
0861:
0862: // identical
0863: return 0;
0864: }
0865:
0866: public static URI create(String uri) {
0867: URI result = null;
0868: try {
0869: result = new URI(uri);
0870: } catch (URISyntaxException e) {
0871: throw new IllegalArgumentException(e.getMessage());
0872: }
0873: return result;
0874: }
0875:
0876: private URI duplicate() {
0877: URI clone = new URI();
0878: clone.absolute = absolute;
0879: clone.authority = authority;
0880: clone.fragment = fragment;
0881: clone.host = host;
0882: clone.opaque = opaque;
0883: clone.path = path;
0884: clone.port = port;
0885: clone.query = query;
0886: clone.scheme = scheme;
0887: clone.schemespecificpart = schemespecificpart;
0888: clone.userinfo = userinfo;
0889: clone.serverAuthority = serverAuthority;
0890: return clone;
0891: }
0892:
0893: /*
0894: * Takes a string that may contain hex sequences like %F1 or %2b and
0895: * converts the hex values following the '%' to lowercase
0896: */
0897: private String convertHexToLowerCase(String s) {
0898: StringBuffer result = new StringBuffer(""); //$NON-NLS-1$
0899: if (s.indexOf('%') == -1) {
0900: return s;
0901: }
0902:
0903: int index = 0, previndex = 0;
0904: while ((index = s.indexOf('%', previndex)) != -1) {
0905: result.append(s.substring(previndex, index + 1));
0906: result.append(s.substring(index + 1, index + 3)
0907: .toLowerCase());
0908: index += 3;
0909: previndex = index;
0910: }
0911: return result.toString();
0912: }
0913:
0914: /*
0915: * Takes two strings that may contain hex sequences like %F1 or %2b and
0916: * compares them, ignoring case for the hex values hex values must always
0917: * occur in pairs like above
0918: */
0919: private boolean equalsHexCaseInsensitive(String first, String second) {
0920: if (first.indexOf('%') != second.indexOf('%')) {
0921: return first.equals(second);
0922: }
0923:
0924: int index = 0, previndex = 0;
0925: while ((index = first.indexOf('%', previndex)) != -1
0926: && second.indexOf('%', previndex) == index) {
0927: boolean match = first.substring(previndex, index).equals(
0928: second.substring(previndex, index));
0929: if (!match) {
0930: return false;
0931: }
0932:
0933: match = first.substring(index + 1, index + 3)
0934: .equalsIgnoreCase(
0935: second.substring(index + 1, index + 3));
0936: if (!match) {
0937: return false;
0938: }
0939:
0940: index += 3;
0941: previndex = index;
0942: }
0943: return first.substring(previndex).equals(
0944: second.substring(previndex));
0945: }
0946:
0947: @Override
0948: public boolean equals(Object o) {
0949: if (!(o instanceof URI)) {
0950: return false;
0951: }
0952: URI uri = (URI) o;
0953:
0954: if (uri.fragment == null && fragment != null
0955: || uri.fragment != null && fragment == null) {
0956: return false;
0957: } else if (uri.fragment != null && fragment != null) {
0958: if (!equalsHexCaseInsensitive(uri.fragment, fragment)) {
0959: return false;
0960: }
0961: }
0962:
0963: if (uri.scheme == null && scheme != null || uri.scheme != null
0964: && scheme == null) {
0965: return false;
0966: } else if (uri.scheme != null && scheme != null) {
0967: if (!uri.scheme.equalsIgnoreCase(scheme)) {
0968: return false;
0969: }
0970: }
0971:
0972: if (uri.opaque && opaque) {
0973: return equalsHexCaseInsensitive(uri.schemespecificpart,
0974: schemespecificpart);
0975: } else if (!uri.opaque && !opaque) {
0976: if (!equalsHexCaseInsensitive(path, uri.path)) {
0977: return false;
0978: }
0979:
0980: if (uri.query != null && query == null || uri.query == null
0981: && query != null) {
0982: return false;
0983: } else if (uri.query != null && query != null) {
0984: if (!equalsHexCaseInsensitive(uri.query, query)) {
0985: return false;
0986: }
0987: }
0988:
0989: if (uri.authority != null && authority == null
0990: || uri.authority == null && authority != null) {
0991: return false;
0992: } else if (uri.authority != null && authority != null) {
0993: if (uri.host != null && host == null
0994: || uri.host == null && host != null) {
0995: return false;
0996: } else if (uri.host == null && host == null) {
0997: // both are registry based, so compare the whole authority
0998: return equalsHexCaseInsensitive(uri.authority,
0999: authority);
1000: } else { // uri.host != null && host != null, so server-based
1001: if (!host.equalsIgnoreCase(uri.host)) {
1002: return false;
1003: }
1004:
1005: if (port != uri.port) {
1006: return false;
1007: }
1008:
1009: if (uri.userinfo != null && userinfo == null
1010: || uri.userinfo == null && userinfo != null) {
1011: return false;
1012: } else if (uri.userinfo != null && userinfo != null) {
1013: return equalsHexCaseInsensitive(userinfo,
1014: uri.userinfo);
1015: } else {
1016: return true;
1017: }
1018: }
1019: } else {
1020: // no authority
1021: return true;
1022: }
1023:
1024: } else {
1025: // one is opaque, the other hierarchical
1026: return false;
1027: }
1028: }
1029:
1030: public String getAuthority() {
1031: return decode(authority);
1032: }
1033:
1034: /**
1035: * Returns the fragment component.
1036: *
1037: * @return String
1038: */
1039: public String getFragment() {
1040: return decode(fragment);
1041: }
1042:
1043: /**
1044: * Returns the host component.
1045: *
1046: * @return String
1047: */
1048: public String getHost() {
1049: return host;
1050: }
1051:
1052: /**
1053: * Returns the path component.
1054: *
1055: * @return String
1056: */
1057: public String getPath() {
1058: return decode(path);
1059: }
1060:
1061: /**
1062: * Returns the port number.
1063: *
1064: * @return int
1065: */
1066: public int getPort() {
1067: return port;
1068: }
1069:
1070: /**
1071: * Returns the query component.
1072: *
1073: * @return String
1074: */
1075: public String getQuery() {
1076: return decode(query);
1077: }
1078:
1079: /**
1080: * Returns the authority component in raw form.
1081: *
1082: * @return String
1083: */
1084: public String getRawAuthority() {
1085: return authority;
1086: }
1087:
1088: /**
1089: * Returns the fragment component in raw form.
1090: *
1091: * @return String
1092: */
1093: public String getRawFragment() {
1094: return fragment;
1095: }
1096:
1097: /**
1098: * Returns the path component in raw form.
1099: *
1100: * @return String
1101: */
1102: public String getRawPath() {
1103: return path;
1104: }
1105:
1106: /**
1107: * Returns the query component in raw form.
1108: *
1109: * @return String
1110: */
1111: public String getRawQuery() {
1112: return query;
1113: }
1114:
1115: /**
1116: * Returns the scheme-specific part component in raw form.
1117: *
1118: * @return String
1119: */
1120: public String getRawSchemeSpecificPart() {
1121: return schemespecificpart;
1122: }
1123:
1124: /**
1125: * Returns the user-info component in raw form.
1126: *
1127: * @return String
1128: */
1129: public String getRawUserInfo() {
1130: return userinfo;
1131: }
1132:
1133: /**
1134: * Returns the scheme.
1135: *
1136: * @return String
1137: */
1138: public String getScheme() {
1139: return scheme;
1140: }
1141:
1142: /**
1143: * Returns the scheme-specific part component.
1144: *
1145: * @return String
1146: */
1147: public String getSchemeSpecificPart() {
1148: return decode(schemespecificpart);
1149: }
1150:
1151: /**
1152: * Returns the userinfo.
1153: *
1154: * @return String
1155: */
1156: public String getUserInfo() {
1157: return decode(userinfo);
1158: }
1159:
1160: @Override
1161: public int hashCode() {
1162: if (hash == -1) {
1163: hash = getHashString().hashCode();
1164: }
1165: return hash;
1166: }
1167:
1168: /**
1169: * Indicates whether this URI is absolute
1170: *
1171: * @return boolean
1172: */
1173: public boolean isAbsolute() {
1174: return absolute;
1175: }
1176:
1177: /**
1178: * Indicates whether this URI is opaque
1179: *
1180: * @return true if the URI is opaque, otherwise false
1181: */
1182: public boolean isOpaque() {
1183: return opaque;
1184: }
1185:
1186: /*
1187: * normalize path, and return the resulting string
1188: */
1189: private String normalize(String path) {
1190: // count the number of '/'s, to determine number of segments
1191: int index = -1;
1192: int pathlen = path.length();
1193: int size = 0;
1194: if (pathlen > 0 && path.charAt(0) != '/') {
1195: size++;
1196: }
1197: while ((index = path.indexOf('/', index + 1)) != -1) {
1198: if (index + 1 < pathlen && path.charAt(index + 1) != '/') {
1199: size++;
1200: }
1201: }
1202:
1203: String[] seglist = new String[size];
1204: boolean[] include = new boolean[size];
1205:
1206: // break the path into segments and store in the list
1207: int current = 0;
1208: int index2 = 0;
1209: index = (pathlen > 0 && path.charAt(0) == '/') ? 1 : 0;
1210: while ((index2 = path.indexOf('/', index + 1)) != -1) {
1211: seglist[current++] = path.substring(index, index2);
1212: index = index2 + 1;
1213: }
1214:
1215: // if current==size, then the last character was a slash
1216: // and there are no more segments
1217: if (current < size) {
1218: seglist[current] = path.substring(index);
1219: }
1220:
1221: // determine which segments get included in the normalized path
1222: for (int i = 0; i < size; i++) {
1223: include[i] = true;
1224: if (seglist[i].equals("..")) { //$NON-NLS-1$
1225: int remove = i - 1;
1226: // search back to find a segment to remove, if possible
1227: while (remove > -1 && !include[remove]) {
1228: remove--;
1229: }
1230: // if we find a segment to remove, remove it and the ".."
1231: // segment
1232: if (remove > -1 && !seglist[remove].equals("..")) { //$NON-NLS-1$
1233: include[remove] = false;
1234: include[i] = false;
1235: }
1236: } else if (seglist[i].equals(".")) { //$NON-NLS-1$
1237: include[i] = false;
1238: }
1239: }
1240:
1241: // put the path back together
1242: StringBuffer newpath = new StringBuffer();
1243: if (path.startsWith("/")) { //$NON-NLS-1$
1244: newpath.append('/');
1245: }
1246:
1247: for (int i = 0; i < seglist.length; i++) {
1248: if (include[i]) {
1249: newpath.append(seglist[i]);
1250: newpath.append('/');
1251: }
1252: }
1253:
1254: // if we used at least one segment and the path previously ended with
1255: // a slash and the last segment is still used, then delete the extra
1256: // trailing '/'
1257: if (!path.endsWith("/") && seglist.length > 0 //$NON-NLS-1$
1258: && include[seglist.length - 1]) {
1259: newpath.deleteCharAt(newpath.length() - 1);
1260: }
1261:
1262: String result = newpath.toString();
1263:
1264: // check for a ':' in the first segment if one exists,
1265: // prepend "./" to normalize
1266: index = result.indexOf(':');
1267: index2 = result.indexOf('/');
1268: if (index != -1 && (index < index2 || index2 == -1)) {
1269: newpath.insert(0, "./"); //$NON-NLS-1$
1270: result = newpath.toString();
1271: }
1272: return result;
1273: }
1274:
1275: public URI normalize() {
1276: if (opaque) {
1277: return this ;
1278: }
1279: String normalizedPath = normalize(path);
1280: // if the path is already normalized, return this
1281: if (path.equals(normalizedPath)) {
1282: return this ;
1283: }
1284: // get an exact copy of the URI re-calculate the scheme specific part
1285: // since the path of the normalized URI is different from this URI.
1286: URI result = duplicate();
1287: result.path = normalizedPath;
1288: result.setSchemeSpecificPart();
1289: return result;
1290: }
1291:
1292: /**
1293: * Return this uri instance if it has already been determined as a
1294: * ServerAuthority Otherwise try to parse it again as a server authority to
1295: * produce a URISyntaxException with the proper diagnostic message.
1296: */
1297: public URI parseServerAuthority() throws URISyntaxException {
1298: if (!serverAuthority) {
1299: new Helper().parseAuthority(true);
1300: }
1301: return this ;
1302: }
1303:
1304: public URI relativize(URI relative) {
1305: if (relative.opaque || opaque) {
1306: return relative;
1307: }
1308:
1309: if (scheme == null ? relative.scheme != null : !scheme
1310: .equals(relative.scheme)) {
1311: return relative;
1312: }
1313:
1314: if (authority == null ? relative.authority != null : !authority
1315: .equals(relative.authority)) {
1316: return relative;
1317: }
1318:
1319: // normalize both paths
1320: String this Path = normalize(path);
1321: String relativePath = normalize(relative.path);
1322:
1323: /*
1324: * if the paths aren't equal, then we need to determine if this URI's
1325: * path is a parent path (begins with) the relative URI's path
1326: */
1327: if (!this Path.equals(relativePath)) {
1328: // if this URI's path doesn't end in a '/', add one
1329: if (!this Path.endsWith("/")) { //$NON-NLS-1$
1330: this Path = this Path + '/';
1331: }
1332: /*
1333: * if the relative URI's path doesn't start with this URI's path,
1334: * then just return the relative URI; the URIs have nothing in
1335: * common
1336: */
1337: if (!relativePath.startsWith(this Path)) {
1338: return relative;
1339: }
1340: }
1341:
1342: URI result = new URI();
1343: result.fragment = relative.fragment;
1344: result.query = relative.query;
1345: // the result URI is the remainder of the relative URI's path
1346: result.path = relativePath.substring(this Path.length());
1347: return result;
1348: }
1349:
1350: public URI resolve(URI relative) {
1351: if (relative.absolute || opaque) {
1352: return relative;
1353: }
1354:
1355: URI result;
1356: if (relative.path.equals("") && relative.scheme == null //$NON-NLS-1$
1357: && relative.authority == null && relative.query == null
1358: && relative.fragment != null) {
1359: // if the relative URI only consists of fragment,
1360: // the resolved URI is very similar to this URI,
1361: // except that it has the fragement from the relative URI.
1362: result = duplicate();
1363: result.fragment = relative.fragment;
1364: // no need to re-calculate the scheme specific part,
1365: // since fragment is not part of scheme specific part.
1366: return result;
1367: }
1368:
1369: if (relative.authority != null) {
1370: // if the relative URI has authority,
1371: // the resolved URI is almost the same as the relative URI,
1372: // except that it has the scheme of this URI.
1373: result = relative.duplicate();
1374: result.scheme = scheme;
1375: result.absolute = absolute;
1376: } else {
1377: // since relative URI has no authority,
1378: // the resolved URI is very similar to this URI,
1379: // except that it has the query and fragment of the relative URI,
1380: // and the path is different.
1381: result = duplicate();
1382: result.fragment = relative.fragment;
1383: result.query = relative.query;
1384: if (relative.path.startsWith("/")) { //$NON-NLS-1$
1385: result.path = relative.path;
1386: } else {
1387: // resolve a relative reference
1388: int endindex = path.lastIndexOf('/') + 1;
1389: result.path = normalize(path.substring(0, endindex)
1390: + relative.path);
1391: }
1392: // re-calculate the scheme specific part since
1393: // query and path of the resolved URI is different from this URI.
1394: result.setSchemeSpecificPart();
1395: }
1396: return result;
1397: }
1398:
1399: /**
1400: * Helper method used to re-calculate the scheme specific part of the
1401: * resolved or normalized URIs
1402: */
1403: private void setSchemeSpecificPart() {
1404: // ssp = [//authority][path][?query]
1405: StringBuffer ssp = new StringBuffer();
1406: if (authority != null) {
1407: ssp.append("//" + authority); //$NON-NLS-1$
1408: }
1409: if (path != null) {
1410: ssp.append(path);
1411: }
1412: if (query != null) {
1413: ssp.append("?" + query); //$NON-NLS-1$
1414: }
1415: schemespecificpart = ssp.toString();
1416: // reset string, so that it can be re-calculated correctly when asked.
1417: string = null;
1418: }
1419:
1420: public URI resolve(String relative) {
1421: return resolve(create(relative));
1422: }
1423:
1424: /*
1425: * Encode unicode chars that are not part of US-ASCII char set into the
1426: * escaped form
1427: *
1428: * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1429: *
1430: * @param component java.lang.String the component to be converted @param
1431: * legalset java.lang.String the legal character set allowed in the
1432: * component s @return java.lang.String the converted string
1433: */
1434: private String encodeOthers(String s) {
1435: try {
1436: /*
1437: * Use a different encoder than URLEncoder since: 1. chars like "/",
1438: * "#", "@" etc needs to be preserved instead of being encoded, 2.
1439: * UTF-8 char set needs to be used for encoding instead of default
1440: * platform one 3. Only other chars need to be converted
1441: */
1442: return URIEncoderDecoder.encodeOthers(s);
1443: } catch (UnsupportedEncodingException e) {
1444: throw new RuntimeException(e.toString());
1445: }
1446: }
1447:
1448: private String decode(String s) {
1449: if (s == null) {
1450: return s;
1451: }
1452:
1453: try {
1454: return URIEncoderDecoder.decode(s);
1455: } catch (UnsupportedEncodingException e) {
1456: throw new RuntimeException(e.toString());
1457: }
1458: }
1459:
1460: public String toASCIIString() {
1461: return encodeOthers(toString());
1462: }
1463:
1464: @Override
1465: public String toString() {
1466: if (string == null) {
1467: StringBuffer result = new StringBuffer();
1468: if (scheme != null) {
1469: result.append(scheme);
1470: result.append(':');
1471: }
1472: if (opaque) {
1473: result.append(schemespecificpart);
1474: } else {
1475: if (authority != null) {
1476: result.append("//"); //$NON-NLS-1$
1477: result.append(authority);
1478: }
1479:
1480: if (path != null) {
1481: result.append(path);
1482: }
1483:
1484: if (query != null) {
1485: result.append('?');
1486: result.append(query);
1487: }
1488: }
1489:
1490: if (fragment != null) {
1491: result.append('#');
1492: result.append(fragment);
1493: }
1494:
1495: string = result.toString();
1496: }
1497: return string;
1498: }
1499:
1500: /*
1501: * Form a string from the components of this URI, similarly to the
1502: * toString() method. But this method converts scheme and host to lowercase,
1503: * and converts escaped octets to lowercase.
1504: */
1505: private String getHashString() {
1506: StringBuffer result = new StringBuffer();
1507: if (scheme != null) {
1508: result.append(scheme.toLowerCase());
1509: result.append(':');
1510: }
1511: if (opaque) {
1512: result.append(schemespecificpart);
1513: } else {
1514: if (authority != null) {
1515: result.append("//"); //$NON-NLS-1$
1516: if (host == null) {
1517: result.append(authority);
1518: } else {
1519: if (userinfo != null) {
1520: result.append(userinfo + "@"); //$NON-NLS-1$
1521: }
1522: result.append(host.toLowerCase());
1523: if (port != -1) {
1524: result.append(":" + port); //$NON-NLS-1$
1525: }
1526: }
1527: }
1528:
1529: if (path != null) {
1530: result.append(path);
1531: }
1532:
1533: if (query != null) {
1534: result.append('?');
1535: result.append(query);
1536: }
1537: }
1538:
1539: if (fragment != null) {
1540: result.append('#');
1541: result.append(fragment);
1542: }
1543:
1544: return convertHexToLowerCase(result.toString());
1545: }
1546:
1547: public URL toURL() throws MalformedURLException {
1548: if (!absolute) {
1549: throw new IllegalArgumentException(
1550: Msg.getString("K0312") + ": " //$NON-NLS-1$//$NON-NLS-2$
1551: + toString());
1552: }
1553: return new URL(toString());
1554: }
1555:
1556: private void readObject(ObjectInputStream in) throws IOException,
1557: ClassNotFoundException {
1558: in.defaultReadObject();
1559: try {
1560: new Helper().parseURI(string, false);
1561: } catch (URISyntaxException e) {
1562: throw new IOException(e.toString());
1563: }
1564: }
1565:
1566: private void writeObject(ObjectOutputStream out)
1567: throws IOException, ClassNotFoundException {
1568: // call toString() to ensure the value of string field is calculated
1569: toString();
1570: out.defaultWriteObject();
1571: }
1572: }
|