0001: /*
0002: * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
0003: * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
0004: */
0005:
0006: package com.sun.portal.rewriter.util.uri;
0007:
0008: import java.io.Serializable;
0009:
0010: public final class URI implements Comparable, Serializable {
0011: private transient String scheme; // null ==> relative URI
0012: private transient String fragment;
0013: private transient String authority; // Registry or server
0014: private transient String userInfo;
0015: private transient String host; // null ==> registry-based
0016: private transient int port = -1; // -1 ==> undefined
0017: private transient String path; // null ==> opaque
0018: private transient String query;
0019: private volatile transient String schemeSpecificPart;
0020: private volatile transient int hash; // Zero ==> undefined
0021: private int length = 16;
0022: private volatile String string; // The only serializable field
0023:
0024: private URI() {
0025: }//constructor
0026:
0027: public URI(final String str) throws URISyntaxException {
0028: length = str.length();
0029: Parser lParser = new Parser(str);
0030: lParser.parse(false);
0031: }//constructor
0032:
0033: public URI parseServerAuthority() throws URISyntaxException {
0034: if ((host != null) || (authority == null)) {
0035: return this ;
0036: }
0037:
0038: defineString();
0039: new Parser(string).parse(true);
0040: return this ;
0041: }//parseServerAuthority()
0042:
0043: public URI normalize() {
0044: return normalize(this );
0045: }//normalize()
0046:
0047: public URI resolve(final URI uri) {
0048: return resolve(this , uri);
0049: }//resolve()
0050:
0051: public String getScheme() {
0052: return scheme;
0053: }//getScheme()
0054:
0055: public boolean isAbsolute() {
0056: return scheme != null;
0057: }//isAbsolute()
0058:
0059: public boolean isOpaque() {
0060: return path == null;
0061: }//isOpaque()
0062:
0063: public String getAuthority() {
0064: return authority;
0065: }//getAuthority()
0066:
0067: public String getUserInfo() {
0068: return userInfo;
0069: }//getUserInfo()
0070:
0071: public String getHost() {
0072: return host;
0073: }//getHost()
0074:
0075: public int getPort() {
0076: return port;
0077: }//getPort()
0078:
0079: public String getPath() {
0080: return path;
0081: }//getPath()
0082:
0083: public String getQuery() {
0084: return query;
0085: }//getQuery()
0086:
0087: public String getFragment() {
0088: return fragment;
0089: }//getFragment()
0090:
0091: public boolean equals(final Object ob) {
0092: if (ob == this ) {
0093: return true;
0094: }
0095: if (!(ob instanceof URI)) {
0096: return false;
0097: }
0098: URI that = (URI) ob;
0099: if (this .isOpaque() != that.isOpaque()) {
0100: return false;
0101: }
0102:
0103: if (!equalIgnoringCase(this .scheme, that.scheme)) {
0104: return false;
0105: }
0106:
0107: if (!equal(this .fragment, that.fragment)) {
0108: return false;
0109: }
0110:
0111: // Opaque
0112: if (this .isOpaque()) {
0113: return equal(this .schemeSpecificPart,
0114: that.schemeSpecificPart);
0115: }
0116:
0117: // Hierarchical
0118: if (!equal(this .path, that.path)) {
0119: return false;
0120: }
0121:
0122: if (!equal(this .query, that.query)) {
0123: return false;
0124: }
0125:
0126: // Authorities
0127: if (this .authority == that.authority) {
0128: return true;
0129: }
0130:
0131: if (this .host != null) {
0132: // Server-based
0133: if (!equal(this .userInfo, that.userInfo)) {
0134: return false;
0135: }
0136:
0137: if (!equalIgnoringCase(this .host, that.host)) {
0138: return false;
0139: }
0140:
0141: if (this .port != that.port) {
0142: return false;
0143: }
0144: } else if (this .authority != null) {
0145: // Registry-based
0146: if (!equal(this .authority, that.authority)) {
0147: return false;
0148: }
0149: } else if (this .authority != that.authority) {
0150: return false;
0151: }
0152:
0153: return true;
0154: }//equals()
0155:
0156: public int hashCode() {
0157: if (hash != 0) {
0158: return hash;
0159: }
0160:
0161: int h = hashIgnoringCase(0, scheme);
0162: h = hash(h, fragment);
0163: if (isOpaque()) {
0164: h = hash(h, schemeSpecificPart);
0165: } else {
0166: h = hash(h, path);
0167: h = hash(h, query);
0168: if (host != null) {
0169: h = hash(h, userInfo);
0170: h = hashIgnoringCase(h, host);
0171: h += 1949 * port;
0172: } else {
0173: h = hash(h, authority);
0174: }
0175: }
0176: hash = h;
0177: return h;
0178: }//hashCode()
0179:
0180: public int compareTo(final Object ob) {
0181: final URI that = (URI) ob;
0182: int c;
0183:
0184: if ((c = compareIgnoringCase(this .scheme, that.scheme)) != 0)
0185: return c;
0186:
0187: if (this .isOpaque()) {
0188: if (that.isOpaque()) {
0189: // Both opaque
0190: if ((c = compare(this .schemeSpecificPart,
0191: that.schemeSpecificPart)) != 0)
0192: return c;
0193: return compare(this .fragment, that.fragment);
0194: }
0195: return +1; // Opaque > hierarchical
0196: } else if (that.isOpaque()) {
0197: return -1; // Hierarchical < opaque
0198: }
0199:
0200: // Hierarchical
0201: if ((this .host != null) && (that.host != null)) {
0202: // Both server-based
0203: if ((c = compare(this .userInfo, that.userInfo)) != 0)
0204: return c;
0205: if ((c = compareIgnoringCase(this .host, that.host)) != 0)
0206: return c;
0207: if ((c = this .port - that.port) != 0)
0208: return c;
0209: } else {
0210: // If one or both authorities are registry-based then we simply
0211: // compare them in the usual, case-sensitive way. If one is
0212: // registry-based and one is server-based then the strings are
0213: // guaranteed to be unequal, hence the comparison will never return
0214: // zero and the compareTo and equals methods will remain
0215: // consistent.
0216: if ((c = compare(this .authority, that.authority)) != 0)
0217: return c;
0218: }
0219:
0220: if ((c = compare(this .path, that.path)) != 0) {
0221: return c;
0222: }
0223:
0224: if ((c = compare(this .query, that.query)) != 0) {
0225: return c;
0226: }
0227:
0228: return compare(this .fragment, that.fragment);
0229: }//compareTo()
0230:
0231: public String toString() {
0232: defineString();
0233: return string;
0234: }
0235:
0236: private static int toLower(final char c) {
0237: if ((c >= 'A') && (c <= 'Z')) {
0238: return c + ('a' - 'A');
0239: }
0240:
0241: return c;
0242: }//toLower()
0243:
0244: private static boolean equal(final String s, final String t) {
0245: if (s == t) {
0246: return true;
0247: }
0248:
0249: if ((s != null) && (t != null)) {
0250: if (s.length() != t.length()) {
0251: return false;
0252: }
0253:
0254: if (s.indexOf('%') < 0) {
0255: return s.equals(t);
0256: }
0257:
0258: int n = s.length();
0259: for (int i = 0; i < n;) {
0260: char c = s.charAt(i);
0261: char d = t.charAt(i);
0262: if (c != '%') {
0263: if (c != d) {
0264: return false;
0265: }
0266:
0267: i++;
0268: continue;
0269: }
0270: i++;
0271: if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0272: return false;
0273: }
0274:
0275: i++;
0276: if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0277: return false;
0278: }
0279:
0280: i++;
0281: }
0282: return true;
0283: }
0284: return false;
0285: }//equal()
0286:
0287: // US-ASCII only
0288: private static boolean equalIgnoringCase(final String s,
0289: final String t) {
0290: if (s == t) {
0291: return true;
0292: }
0293:
0294: if ((s != null) && (t != null)) {
0295: int n = s.length();
0296: if (t.length() != n) {
0297: return false;
0298: }
0299:
0300: for (int i = 0; i < n; i++) {
0301: if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0302: return false;
0303: }
0304: }
0305: return true;
0306: }
0307: return false;
0308: }//equalIgnoringCase()
0309:
0310: private static int hash(final int hash, final String s) {
0311: if (s == null) {
0312: return hash;
0313: }
0314:
0315: return hash * 127 + s.hashCode();
0316: }//hash()
0317:
0318: // US-ASCII only
0319: private static int hashIgnoringCase(final int hash, final String s) {
0320: if (s == null) {
0321: return hash;
0322: }
0323:
0324: int h = hash;
0325: int n = s.length();
0326: for (int i = 0; i < n; i++) {
0327: h = 31 * h + toLower(s.charAt(i));
0328: }
0329:
0330: return h;
0331: }//hashIgnoringCase()
0332:
0333: private static int compare(final String s, final String t) {
0334: if (s == t) {
0335: return 0;
0336: }
0337:
0338: if (s != null) {
0339: if (t != null) {
0340: return s.compareTo(t);
0341: }
0342:
0343: else {
0344: return -1;
0345: }
0346: } else {
0347: return +1;
0348: }
0349: }//compare()
0350:
0351: // US-ASCII only
0352: private static int compareIgnoringCase(final String s,
0353: final String t) {
0354: if (s == t)
0355: return 0;
0356: if (s != null) {
0357: if (t != null) {
0358: int sn = s.length();
0359: int tn = t.length();
0360: int n = sn < tn ? sn : tn;
0361: for (int i = 0; i < n; i++) {
0362: int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
0363: if (c != 0)
0364: return c;
0365: }
0366: return sn - tn;
0367: }
0368: return +1;
0369: } else {
0370: return -1;
0371: }
0372: }
0373:
0374: private static void appendAuthority(final StringBuffer sb,
0375: final String authority, final String userInfo,
0376: final String host, final int port) {
0377: if (host != null) {
0378: sb.append("//");
0379: if (userInfo != null) {
0380: sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
0381: sb.append('@');
0382: }
0383: boolean needBrackets = ((host.indexOf(':') >= 0)
0384: && !host.startsWith("[") && !host.endsWith("]"));
0385: if (needBrackets)
0386: sb.append('[');
0387: sb.append(host);
0388: if (needBrackets)
0389: sb.append(']');
0390: if (port != -1) {
0391: sb.append(':');
0392: sb.append(port);
0393: }
0394: } else if (authority != null) {
0395: sb.append("//");
0396: sb.append(quote(authority, L_REG_NAME | L_SERVER,
0397: H_REG_NAME | H_SERVER));
0398: }
0399: }
0400:
0401: private void appendSchemeSpecificPart(final StringBuffer sb,
0402: final String opaquePart, final String authority,
0403: final String userInfo, final String host, final int port,
0404: final String path, final String query) {
0405: if (opaquePart != null) {
0406: sb.append(quote(opaquePart, L_URIC, H_URIC));
0407: } else {
0408: appendAuthority(sb, authority, userInfo, host, port);
0409: if (path != null) {
0410: sb.append(quote(path, L_PATH, H_PATH));
0411: }
0412:
0413: if (query != null) {
0414: sb.append('?');
0415: sb.append(quote(query, L_URIC, H_URIC));
0416: }
0417: }
0418: }
0419:
0420: private static void appendFragment(final StringBuffer sb,
0421: final String fragment) {
0422: if (fragment != null) {
0423: sb.append('#');
0424: sb.append(quote(fragment, L_URIC, H_URIC));
0425: }
0426: }
0427:
0428: private String toString(final String scheme,
0429: final String opaquePart, final String authority,
0430: final String userInfo, final String host, final int port,
0431: final String path, final String query, final String fragment) {
0432: final StringBuffer sb = new StringBuffer(length);
0433: if (scheme != null) {
0434: sb.append(scheme);
0435: sb.append(':');
0436: }
0437: appendSchemeSpecificPart(sb, opaquePart, authority, userInfo,
0438: host, port, path, query);
0439: appendFragment(sb, fragment);
0440: return sb.toString();
0441: }
0442:
0443: private void defineString() {
0444: if (string != null)
0445: return;
0446: string = toString(scheme, isOpaque() ? schemeSpecificPart
0447: : null, authority, userInfo, host, port, path, query,
0448: fragment);
0449: }
0450:
0451: // -- Normalization, resolution, and relativization --
0452: // RFC2396 5.2 (6)
0453: private static String resolvePath(final String base,
0454: final String child, final boolean absolute) {
0455: final int i = base.lastIndexOf('/');
0456: final int cn = child.length();
0457: String path = "";
0458:
0459: if (cn == 0) {
0460: // 5.2 (6a)
0461: if (i >= 0) {
0462: path = base.substring(0, i + 1);
0463: }
0464: } else {
0465: StringBuffer sb = new StringBuffer(child.length() + i + 1);
0466: // 5.2 (6a)
0467: if (i >= 0) {
0468: sb.append(base.substring(0, i + 1));
0469: }
0470: // 5.2 (6b)
0471: sb.append(child);
0472: path = sb.toString();
0473: }
0474:
0475: // 5.2 (6c-f)
0476: final String np = normalize(path);
0477:
0478: // 5.2 (6g): If the result is absolute but the path begins with "../",
0479: // then we simply leave the path as-is
0480:
0481: return np;
0482: }//resolvePath()
0483:
0484: // RFC2396 5.2
0485: private static URI resolve(final URI base, final URI child) {
0486: if (base.isOpaque() || child.isOpaque()) {
0487: return child;
0488: }
0489:
0490: // 5.2 (2): Reference to current document (lone fragment)
0491: if ((child.scheme == null) && (child.authority == null)
0492: && child.path.equals("") && (child.fragment != null)
0493: && (child.query == null)) {
0494: if ((base.fragment != null)
0495: && child.fragment.equals(base.fragment)) {
0496: return base;
0497: }
0498: URI ru = new URI();
0499: ru.scheme = base.scheme;
0500: ru.authority = base.authority;
0501: ru.userInfo = base.userInfo;
0502: ru.host = base.host;
0503: ru.port = base.port;
0504: ru.path = base.path;
0505: ru.fragment = child.fragment;
0506: ru.query = base.query;
0507: return ru;
0508: }
0509:
0510: // 5.2 (3): Child is absolute
0511: if (child.scheme != null) {
0512: return child;
0513: }
0514:
0515: URI ru = new URI(); // Resolved URI
0516: ru.scheme = base.scheme;
0517: ru.query = child.query;
0518: ru.fragment = child.fragment;
0519:
0520: // 5.2 (4): Authority
0521: if (child.authority == null) {
0522: ru.authority = base.authority;
0523: ru.host = base.host;
0524: ru.userInfo = base.userInfo;
0525: ru.port = base.port;
0526:
0527: String cp = (child.path == null) ? "" : child.path;
0528: if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
0529: // 5.2 (5): Child path is absolute
0530: ru.path = child.path;
0531: } else {
0532: // 5.2 (6): Resolve relative path
0533: ru.path = resolvePath(base.path, cp, base.isAbsolute());
0534: }
0535: } else {
0536: ru.authority = child.authority;
0537: ru.host = child.host;
0538: ru.userInfo = child.userInfo;
0539: ru.host = child.host;
0540: ru.port = child.port;
0541: ru.path = child.path;
0542: }
0543:
0544: // 5.2 (7): Recombine (nothing to do here)
0545: return ru;
0546: }
0547:
0548: // If the given URI's path is normal then return the URI;
0549: // o.w., return a new URI containing the normalized path.
0550: private static URI normalize(final URI u) {
0551: if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
0552: return u;
0553:
0554: String np = normalize(u.path);
0555: if (np == u.path)
0556: return u;
0557:
0558: URI v = new URI();
0559: v.scheme = u.scheme;
0560: v.fragment = u.fragment;
0561: v.authority = u.authority;
0562: v.userInfo = u.userInfo;
0563: v.host = u.host;
0564: v.port = u.port;
0565: v.path = np;
0566: v.query = u.query;
0567: return v;
0568: }
0569:
0570: private static int needsNormalization(final String path) {
0571: boolean normal = true;
0572: int ns = 0; // Number of segments
0573: final int end = path.length() - 1; // Index of last char in path
0574: int p = 0; // Index of next char in path
0575:
0576: // Skip initial slashes
0577: while (p <= end) {
0578: if (path.charAt(p) != '/') {
0579: break;
0580: }
0581:
0582: p++;
0583: }
0584: if (p > 1) {
0585: normal = false;
0586: }
0587:
0588: // Scan segments
0589: while (p <= end) {
0590: if ((path.charAt(p) == '.')
0591: && ((p == end) || ((path.charAt(p + 1) == '/') || ((path
0592: .charAt(p + 1) == '.') && ((p + 1 == end) || (path
0593: .charAt(p + 2) == '/')))))) {
0594: normal = false;
0595: }
0596: ns++;
0597:
0598: while (p <= end) {
0599: if (path.charAt(p++) != '/') {
0600: continue;
0601: }
0602:
0603: while (p <= end) {
0604: if (path.charAt(p) != '/') {
0605: break;
0606: }
0607:
0608: normal = false;
0609: p++;
0610: }
0611:
0612: break;
0613: }
0614: }
0615:
0616: return normal ? -1 : ns;
0617: }//needsNormalization()
0618:
0619: private static void split(final char[] path, final int[] segs) {
0620: final int end = path.length - 1; // Index of last char in path
0621: int p = 0; // Index of next char in path
0622: int i = 0; // Index of current segment
0623:
0624: // Skip initial slashes
0625: while (p <= end) {
0626: if (path[p] != '/') {
0627: break;
0628: }
0629: path[p] = '\0';
0630: p++;
0631: }
0632:
0633: while (p <= end) {
0634: // Note start of segment
0635: segs[i++] = p++;
0636:
0637: // Find beginning of next segment
0638: while (p <= end) {
0639: if (path[p++] != '/') {
0640: continue;
0641: }
0642:
0643: path[p - 1] = '\0';
0644:
0645: // Skip redundant slashes
0646: while (p <= end) {
0647: if (path[p] != '/') {
0648: break;
0649: }
0650: path[p++] = '\0';
0651: }
0652: break;
0653: }
0654: }
0655:
0656: if (i != segs.length) {
0657: throw new InternalError(); // ASSERT
0658: }
0659: }//split()
0660:
0661: // Join the segments in the given path according to the given segment-index
0662: // array, ignoring those segments whose index entries have been set to -1,
0663: // and inserting slashes as needed. Return the length of the resulting
0664: // path.
0665: //
0666: // Preconditions:
0667: // segs[i] == -1 implies segment i is to be ignored
0668: // path computed by split, as above, with '\0' having replaced '/'
0669: //
0670: // Postconditions:
0671: // path[0] .. path[return value] == Resulting path
0672: private static int join(final char[] path, final int[] segs) {
0673: final int ns = segs.length; // Number of segments
0674: final int end = path.length - 1; // Index of last char in path
0675: int p = 0; // Index of next path char to write
0676:
0677: if (path[p] == '\0') {
0678: // Restore initial slash for absolute paths
0679: path[p++] = '/';
0680: }
0681:
0682: for (int i = 0; i < ns; i++) {
0683: int q = segs[i]; // Current segment
0684: if (q == -1) {
0685: // Ignore this segment
0686: continue;
0687: }
0688:
0689: if (p == q) {
0690: // We're already at this segment, so just skip to its end
0691: while ((p <= end) && (path[p] != '\0')) {
0692: p++;
0693: }
0694:
0695: if (p <= end) {
0696: // Preserve trailing slash
0697: path[p++] = '/';
0698:
0699: //nag fix for test case for path in url:
0700: //"https://www.vlc.com.au/http://www.vlc.com.au/abc.html"
0701: //after normalization path was /http:/www.vlc.com.au/abc.html as
0702: //oppsed to /http://www.vlc.com.au/abc.html - observer :// after protocol
0703: if (!(p > end) && (path[p] == '\0')) {
0704: path[p++] = '/';
0705: }
0706: }
0707: } else if (p < q) {
0708: // Copy q down to p
0709: while ((q <= end) && (path[q] != '\0')) {
0710: path[p++] = path[q++];
0711: }
0712:
0713: if (q <= end) {
0714: // Preserve trailing slash
0715: path[p++] = '/';
0716: //nag fix for test case for path in url:
0717: //"https://www.vlc.com.au/../http://www.vlc.com.au/abc.html"
0718: //after normalization path was ../http:/www.vlc.com.au/abc.html as
0719: //oppsed to ../http://www.vlc.com.au/abc.html - observer :// after protocol
0720: if ((q + 1 <= end) && (path[q + 1] == '\0')) {
0721: path[p++] = '/';
0722: }
0723: }
0724: } else {
0725: throw new InternalError(); // ASSERT false
0726: }
0727: }
0728:
0729: return p;
0730: }//join()
0731:
0732: // Remove "." segments from the given path, and remove segment pairs
0733: // consisting of a non-".." segment followed by a ".." segment.
0734: private static void removeDots(final char[] path, final int[] segs) {
0735: final int ns = segs.length;
0736: final int end = path.length - 1;
0737:
0738: for (int i = 0; i < ns; i++) {
0739: int dots = 0; // Number of dots found (0, 1, or 2)
0740:
0741: // Find next occurrence of "." or ".."
0742: do {
0743: int p = segs[i];
0744: if (path[p] == '.') {
0745: if (p == end) {
0746: dots = 1;
0747: break;
0748: } else if (path[p + 1] == '\0') {
0749: dots = 1;
0750: break;
0751: } else if ((path[p + 1] == '.')
0752: && ((p + 1 == end) || (path[p + 2] == '\0'))) {
0753: dots = 2;
0754: break;
0755: }
0756: }
0757: i++;
0758: } while (i < ns);
0759:
0760: if ((i > ns) || (dots == 0)) {
0761: break;
0762: }
0763:
0764: if (dots == 1) {
0765: // Remove this occurrence of "."
0766: segs[i] = -1;
0767: } else {
0768: // If there is a preceding non-".." segment, remove both that
0769: // segment and this occurrence of ".."; otherwise, leave this
0770: // ".." segment as-is.
0771: int j;
0772: for (j = i - 1; j >= 0; j--) {
0773: if (segs[j] != -1) {
0774: break;
0775: }
0776: }
0777:
0778: if (j >= 0) {
0779: int q = segs[j];
0780: if (!((path[q] == '.') && (path[q + 1] == '.') && (path[q + 2] == '\0'))) {
0781: segs[i] = -1;
0782: segs[j] = -1;
0783: }
0784: }
0785: }
0786: }//for loop
0787: }//removeDots()
0788:
0789: // DEVIATION: If the normalized path is relative, and if the first
0790: // segment could be parsed as a scheme name, then prepend a "." segment
0791: private static void maybeAddLeadingDot(final char[] path,
0792: final int[] segs) {
0793:
0794: if (path[0] == '\0') {
0795: // The path is absolute
0796: return;
0797: }
0798:
0799: int ns = segs.length;
0800: int f = 0; // Index of first segment
0801: while (f < ns) {
0802: if (segs[f] >= 0) {
0803: break;
0804: }
0805:
0806: f++;
0807: }
0808:
0809: if ((f >= ns) || (f == 0)) {
0810: // The path is empty, or else the original first segment survived,
0811: // in which case we already know that no leading "." is needed
0812: return;
0813: }
0814:
0815: int p = segs[f];
0816: boolean exception = false;
0817:
0818: try {
0819: while ((path[p] != ':') && (path[p] != '\0')) {
0820: p++;
0821: }
0822: } catch (Exception e) {
0823: exception = true;
0824: }
0825:
0826: if (exception || path[p] == '\0') {
0827: // No colon in first segment, so no "." needed
0828: return;
0829: }
0830:
0831: // At this point we know that the first segment is unused,
0832: // hence we can insert a "." segment at that position
0833: path[0] = '.';
0834: path[1] = '\0';
0835: segs[0] = 0;
0836: }//maybeAddLeadingDot()
0837:
0838: // Normalize the given path string. A normal path string has no empty
0839: // segments (i.e., occurrences of "//"), no segments equal to ".", and no
0840: // segments equal to ".." that are preceded by a segment not equal to "..".
0841: // In contrast to Unix-style pathname normalization, for URI paths we
0842: // always retain trailing slashes.
0843: public static String normalize(final String ps) {
0844:
0845: // Does this path need normalization?
0846: final int ns = needsNormalization(ps); // Number of segments
0847: if (ns < 0) {
0848: // Nope -- just return it
0849: return ps;
0850: }
0851:
0852: char[] path = ps.toCharArray(); // Path in char-array form
0853:
0854: // Split path into segments
0855: int[] segs = new int[ns]; // Segment-index array
0856: split(path, segs);
0857:
0858: // Remove dots
0859: removeDots(path, segs);
0860:
0861: // Prevent scheme-name confusion
0862: maybeAddLeadingDot(path, segs);
0863:
0864: // Join the remaining segments and return the result
0865: return new String(path, 0, join(path, segs));
0866: }
0867:
0868: // -- Character classes for parsing --
0869:
0870: // RFC2396 precisely specifies which characters in the US-ASCII charset are
0871: // permissible in the various components of a URI reference. We here
0872: // define a set of mask pairs to aid in enforcing these restrictions. Each
0873: // mask pair consists of two longs, a low mask and a high mask. Taken
0874: // together they represent a 128-bit mask, where bit i is set iff the
0875: // character with value i is permitted.
0876: //
0877: // This approach is more efficient than sequentially searching arrays of
0878: // permitted characters. It could be made still more efficient by
0879: // precompiling the mask information so that a character's presence in a
0880: // given mask could be determined by a single table lookup.
0881: // Compute the low-order mask for the characters in the given string
0882: private static long lowMask(final String chars) {
0883: final int n = chars.length();
0884: long m = 0;
0885: for (int i = 0; i < n; i++) {
0886: char c = chars.charAt(i);
0887: if (c < 64)
0888: m |= (1L << c);
0889: }
0890: return m;
0891: }
0892:
0893: // Compute the high-order mask for the characters in the given string
0894: private static long highMask(final String chars) {
0895: final int n = chars.length();
0896: long m = 0;
0897: for (int i = 0; i < n; i++) {
0898: char c = chars.charAt(i);
0899: if ((c >= 64) && (c < 128))
0900: m |= (1L << (c - 64));
0901: }
0902: return m;
0903: }
0904:
0905: // Compute a low-order mask for the characters
0906: // between first and last, inclusive
0907: private static long lowMask(final char first, final char last) {
0908: long m = 0;
0909: final int f = Math.max(Math.min(first, 63), 0);
0910: final int l = Math.max(Math.min(last, 63), 0);
0911: for (int i = f; i <= l; i++)
0912: m |= 1L << i;
0913: return m;
0914: }
0915:
0916: // Compute a high-order mask for the characters
0917: // between first and last, inclusive
0918: private static long highMask(final char first, final char last) {
0919: long m = 0;
0920: final int f = Math.max(Math.min(first, 127), 64) - 64;
0921: final int l = Math.max(Math.min(last, 127), 64) - 64;
0922: for (int i = f; i <= l; i++) {
0923: m |= 1L << i;
0924: }
0925: return m;
0926: }
0927:
0928: // Tell whether the given character is permitted by the given mask pair
0929: private static boolean match(final char c, final long lowMask,
0930: final long highMask) {
0931: if (c < 64)
0932: return ((1L << c) & lowMask) != 0;
0933: if (c < 128)
0934: return ((1L << (c - 64)) & highMask) != 0;
0935: return false;
0936: }
0937:
0938: // Character-class masks, in reverse order from RFC2396 because
0939: // initializers for static fields cannot make forward references.
0940:
0941: // digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
0942: // "8" | "9"
0943: private static final long L_DIGIT = lowMask('0', '9');
0944: private static final long H_DIGIT = 0L;
0945:
0946: // upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
0947: // "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
0948: // "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
0949: private static final long L_UPALPHA = 0L;
0950: private static final long H_UPALPHA = highMask('A', 'Z');
0951:
0952: // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
0953: // "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
0954: // "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
0955: private static final long L_LOWALPHA = 0L;
0956: private static final long H_LOWALPHA = highMask('a', 'z');
0957:
0958: // alpha = lowalpha | upalpha
0959: private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
0960: private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
0961:
0962: // alphanum = alpha | digit
0963: private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
0964: private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
0965:
0966: // hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
0967: // "a" | "b" | "c" | "d" | "e" | "f"
0968: private static final long L_HEX = L_DIGIT;
0969: private static final long H_HEX = highMask('A', 'F')
0970: | highMask('a', 'f');
0971:
0972: // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
0973: // "(" | ")"
0974: private static final long L_MARK = lowMask("-_.!~*'()");
0975: private static final long H_MARK = highMask("-_.!~*'()");
0976:
0977: // unreserved = alphanum | mark
0978: private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
0979: private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
0980:
0981: // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
0982: // "$" | "," | "[" | "]"
0983: // Added per RFC2732: "[", "]"
0984: private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
0985: private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
0986:
0987: // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
0988: // characters are allowed; this is handled by the scanEscape method below.
0989: private static final long L_ESCAPED = 1L;
0990: private static final long H_ESCAPED = 0L;
0991:
0992: // uric = reserved | unreserved | escaped
0993: private static final long L_URIC = L_RESERVED | L_UNRESERVED
0994: | L_ESCAPED;
0995: private static final long H_URIC = H_RESERVED | H_UNRESERVED
0996: | H_ESCAPED;
0997:
0998: // pchar = unreserved | escaped |
0999: // ":" | "@" | "&" | "=" | "+" | "$" | ","
1000: private static final long L_PCHAR = L_UNRESERVED | L_ESCAPED
1001: | lowMask(":@&=+$,");
1002: private static final long H_PCHAR = H_UNRESERVED | H_ESCAPED
1003: | highMask(":@&=+$,");
1004:
1005: // All valid path characters
1006: private static final long L_PATH = L_PCHAR | lowMask(";/");
1007: private static final long H_PATH = H_PCHAR | highMask(";/");
1008:
1009: // Dash, for use in domainlabel and toplabel
1010: private static final long L_DASH = lowMask("-");
1011: private static final long H_DASH = highMask("-");
1012:
1013: // Dot, for use in hostnames
1014: private static final long L_DOT = lowMask(".");
1015: private static final long H_DOT = highMask(".");
1016:
1017: // userinfo = *( unreserved | escaped |
1018: // ";" | ":" | "&" | "=" | "+" | "$" | "," )
1019: private static final long L_USERINFO = L_UNRESERVED | L_ESCAPED
1020: | lowMask(";:&=+$,");
1021: private static final long H_USERINFO = H_UNRESERVED | H_ESCAPED
1022: | highMask(";:&=+$,");
1023:
1024: // reg_name = 1*( unreserved | escaped | "$" | "," |
1025: // ";" | ":" | "@" | "&" | "=" | "+" )
1026: private static final long L_REG_NAME = L_UNRESERVED | L_ESCAPED
1027: | lowMask("$,;:@&=+");
1028: private static final long H_REG_NAME = H_UNRESERVED | H_ESCAPED
1029: | highMask("$,;:@&=+");
1030:
1031: // All valid characters for server-based authorities
1032: private static final long L_SERVER = L_USERINFO | L_ALPHANUM
1033: | L_DASH | lowMask(".:@[]");
1034: private static final long H_SERVER = H_USERINFO | H_ALPHANUM
1035: | H_DASH | highMask(".:@[]");
1036:
1037: // scheme = alpha *( alpha | digit | "+" | "-" | "." )
1038: private static final long L_SCHEME = L_ALPHA | L_DIGIT
1039: | lowMask("+-.");
1040: private static final long H_SCHEME = H_ALPHA | H_DIGIT
1041: | highMask("+-.");
1042:
1043: private static void appendEscape(final StringBuffer sb, final byte b) {
1044: //Bug No: 4701655 - Don't escape the chars
1045: sb.append((char) b);
1046:
1047: /*sb.append( '%' );
1048: sb.append( hexDigits[( b >> 4 ) & 0x0f] );
1049: sb.append( hexDigits[( b >> 0 ) & 0x0f] );*/
1050: }//appendEscape()
1051:
1052: private static void appendEncoded(final StringBuffer sb,
1053: final char c) {
1054: /*ByteBuffer bb = null;
1055: try
1056: {
1057: bb = ThreadLocalCoders.encoderFor( "UTF-8" )
1058: .encode( CharBuffer.wrap( "" + c ) );
1059: }
1060: catch ( CharacterCodingException x )
1061: {
1062: //assert
1063: false;
1064: }
1065: while ( bb.hasRemaining() )
1066: {
1067: int b = bb.get() & 0xff;
1068: if ( b >= 0x80 )
1069: appendEscape( sb, (byte) b );
1070: else
1071: sb.append( (char) b );
1072: }*/
1073: sb.append(c);
1074: }//appendEncoded()
1075:
1076: // Quote any characters in s that are not permitted
1077: // by the given mask pair
1078: private static String quote(final String s, final long lowMask,
1079: final long highMask) {
1080: StringBuffer sb = null;
1081: final boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
1082: for (int i = 0; i < s.length(); i++) {
1083: char c = s.charAt(i);
1084: if (c < '\u0080') {
1085: if (!match(c, lowMask, highMask)) {
1086: if (sb == null) {
1087: sb = new StringBuffer();
1088: sb.append(s.substring(0, i));
1089: }
1090:
1091: appendEscape(sb, (byte) c);
1092: } else {
1093: if (sb != null) {
1094: sb.append(c);
1095: }
1096: }
1097: } else if (allowNonASCII
1098: && (Character.isSpaceChar(c) || Character
1099: .isISOControl(c))) {
1100: if (sb == null) {
1101: sb = new StringBuffer();
1102: sb.append(s.substring(0, i));
1103: }
1104:
1105: appendEncoded(sb, c);
1106: } else {
1107: if (sb != null) {
1108: sb.append(c);
1109: }
1110: }
1111: }
1112:
1113: return (sb == null) ? s : sb.toString();
1114: }
1115:
1116: // -- Parsing --
1117: // For convenience we wrap the input URI string in a new instance of the
1118: // following internal class. This saves always having to pass the input
1119: // string as an argument to each internal scan/parse method.
1120: private class Parser {
1121: private String input; // URI input string
1122: private boolean requireServerAuthority = false;
1123:
1124: Parser(final String s) {
1125: input = s;
1126: string = s;
1127: }//constructor
1128:
1129: // -- Methods for throwing URISyntaxException in various ways --
1130: private void fail(final String reason)
1131: throws URISyntaxException {
1132: throw new URISyntaxException(input, reason);
1133: }//fail()
1134:
1135: private void fail(final String reason, final int p)
1136: throws URISyntaxException {
1137: throw new URISyntaxException(input, reason, p);
1138: }//fail
1139:
1140: private void failExpecting(final String expected, final int p)
1141: throws URISyntaxException {
1142: fail("Expected " + expected, p);
1143: }//failExpecting()
1144:
1145: // -- Simple access to the input string --
1146: // Return a substring of the input string
1147: private String substring(final int start, final int end) {
1148: return input.substring(start, end);
1149: }//substring()
1150:
1151: // Return the char at position p,
1152: // assuming that p < input.length()
1153: private char charAt(final int p) {
1154: return input.charAt(p);
1155: }//charAt()
1156:
1157: // Tells whether start < end and, if so, whether charAt(start) == c
1158: private boolean at(final int start, final int end, final char c) {
1159: return (start < end) && (charAt(start) == c);
1160: }//at()
1161:
1162: // Tells whether start + s.length() < end and, if so,
1163: // whether the chars at the start position match s exactly
1164: private boolean at(final int start, final int end,
1165: final String s) {
1166: int p = start;
1167: final int sn = s.length();
1168: if (sn > end - p)
1169: return false;
1170: int i = 0;
1171: while (i < sn) {
1172: if (charAt(p++) != s.charAt(i++)) {
1173: break;
1174: }
1175: }
1176:
1177: return (i == sn);
1178: }//at()
1179:
1180: // -- Scanning --
1181:
1182: // The various scan and parse methods that follow use a uniform
1183: // convention of taking the current start position and end index as
1184: // their first two arguments. The start is inclusive while the end is
1185: // exclusive, just as in the String class, i.e., a start/end pair
1186: // denotes the left-open interval [start, end) of the input string.
1187: //
1188: // These methods never proceed past the end position. They may return
1189: // -1 to indicate outright failure, but more often they simply return
1190: // the position of the first char after the last char scanned. Thus
1191: // a typical idiom is
1192: //
1193: // int p = start;
1194: // int q = scan(p, end, ...);
1195: // if (q > p)
1196: // // We scanned something
1197: // ...;
1198: // else if (q == p)
1199: // // We scanned nothing
1200: // ...;
1201: // else if (q == -1)
1202: // // Something went wrong
1203: // ...;
1204:
1205: // Scan a specific char: If the char at the given start position is
1206: // equal to c, return the index of the next char; otherwise, return the
1207: // start position.
1208: private int scan(final int start, final int end, final char c) {
1209: if ((start < end) && (charAt(start) == c))
1210: return start + 1;
1211: return start;
1212: }
1213:
1214: // Scan forward from the given start position. Stop at the first char
1215: // in the err string (in which case -1 is returned), or the first char
1216: // in the stop string (in which case the index of the preceding char is
1217: // returned), or the end of the input string (in which case the length
1218: // of the input string is returned). May return the start position if
1219: // nothing matches.
1220: private int scan(final int start, final int end,
1221: final String err, final String stop) {
1222: int p = start;
1223: while (p < end) {
1224: char c = charAt(p);
1225:
1226: if (err.indexOf(c) >= 0) {
1227: return -1;
1228: }
1229:
1230: if (stop.indexOf(c) >= 0) {
1231: break;
1232: }
1233: p++;
1234: }
1235: return p;
1236: }//scan()
1237:
1238: // Scan a potential escape sequence, starting at the given position,
1239: // with the given first char (i.e., charAt(start) == c).
1240: //
1241: // This method assumes that if escapes are allowed then visible
1242: // non-US-ASCII chars are also allowed.
1243: private int scanEscape(final int start, final int n,
1244: final char first) throws URISyntaxException {
1245: final int p = start;
1246: final char c = first;
1247: if (c == '%') {
1248: //by default only hex numbers are allowed..
1249: // Process escape pair
1250: if ((p + 3 <= n) && match(charAt(p + 1), L_HEX, H_HEX)
1251: && match(charAt(p + 2), L_HEX, H_HEX)) {
1252: return p + 3;
1253: }
1254: //nag fix - '&' followed by '%' is allowed
1255: else if (charAt(p + 1) == '&') {
1256: return p + 1;
1257: }
1258:
1259: fail("Malformed escape pair", p);
1260: } else if ((c > 128) && !Character.isSpaceChar(c)
1261: && !Character.isISOControl(c)) {
1262: // Allow unescaped but visible non-US-ASCII chars
1263: return p + 1;
1264: }
1265: return p;
1266: }
1267:
1268: //for all unescaped chars in URI, browser like Netscape 6.2 and all
1269: //do the escaping while making a request so, tollerate the invlaid chars
1270: // Scan chars that match the given mask pair
1271: private int scan(final int start, final int n,
1272: final long lowMask, final long highMask)
1273: throws URISyntaxException {
1274: int p = start;
1275: while (p < n) {
1276: char c = charAt(p);
1277: //Bug No: 4701655 - Accept windows style URI's i.e .\abc.html
1278: if ((c == '\\') || match(c, lowMask, highMask)) {
1279: p++;
1280: continue;
1281: }
1282: if ((lowMask & L_ESCAPED) != 0) {
1283: int q = scanEscape(p, n, c);
1284: if (q > p) {
1285: p = q;
1286: continue;
1287: }
1288: }
1289: break;
1290: }
1291: return p;
1292: }//scan()
1293:
1294: // Check that each of the chars in [start, end) matches the given mask
1295: private void checkChars(final int start, final int end,
1296: final long lowMask, final long highMask,
1297: final String what) throws URISyntaxException {
1298: final int p = scan(start, end, lowMask, highMask);
1299:
1300: if (p < end) {
1301: fail("Illegal character in " + what, p);
1302: }
1303: }//checkChars()
1304:
1305: // Check that the char at position p matches the given mask
1306: private void checkChar(final int p, final long lowMask,
1307: final long highMask, final String what)
1308: throws URISyntaxException {
1309: checkChars(p, p + 1, lowMask, highMask, what);
1310: }
1311:
1312: // -- Parsing --
1313:
1314: // [<scheme>:]<scheme-specific-part>[#<fragment>]
1315: void parse(final boolean rsa) throws URISyntaxException {
1316: requireServerAuthority = rsa;
1317: final int ssp; // Start of scheme-specific part
1318: final int n = input.length();
1319: int p = scan(0, n, "/?#", ":");
1320: if ((p >= 0) && at(p, n, ':')) {
1321: if (p == 0) {
1322: failExpecting("scheme name", 0);
1323: }
1324:
1325: checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
1326: checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
1327: scheme = substring(0, p);
1328: p++; // Skip ':'
1329: ssp = p;
1330:
1331: if (at(p, n, '/')) {
1332: p = parseHierarchical(p, n);
1333: } else {
1334: int q = scan(p, n, "", "#");
1335: if (q <= p) {
1336: failExpecting("scheme-specific part", p);
1337: }
1338:
1339: checkChars(p, q, L_URIC, H_URIC, "opaque part");
1340: p = q;
1341: }
1342: } else {
1343: ssp = 0;
1344: p = parseHierarchical(0, n);
1345: }
1346:
1347: schemeSpecificPart = substring(ssp, p);
1348:
1349: if (at(p, n, '#')) {
1350: //reference check
1351: //BugNo:4762844, 4744455
1352: //Don't Check the validity of the Reference
1353: //checkChars( p + 1, n, L_URIC, H_URIC, "fragment" );
1354: fragment = substring(p + 1, n);
1355: p = n;
1356: }
1357:
1358: if (p < n) {
1359: fail("end of URI", p);
1360: }
1361: }//parse()
1362:
1363: // [//authority]<path>[?<query>]
1364: //
1365: // DEVIATION from RFC2396: We allow an empty authority component as
1366: // long as it's followed by a non-empty path, query component, or
1367: // fragment component. This is so that URIs such as "file:///foo/bar"
1368: // will parse. This seems to be the intent of RFC2396, though the
1369: // grammar does not permit it. If the authority is empty then the
1370: // userInfo, host, and port components are undefined.
1371: //
1372: // DEVIATION from RFC2396: We allow empty relative paths. This seems
1373: // to be the intent of RFC2396, but the grammar does not permit it.
1374: // The primary consequence of this deviation is that "#f" parses as a
1375: // relative URI with an empty path.
1376: private int parseHierarchical(final int start, final int n)
1377: throws URISyntaxException {
1378: int p = start;
1379: if (at(p, n, '/') && at(p + 1, n, '/')) {
1380: p += 2;
1381: int q = scan(p, n, "", "/?#");
1382: if (q > p) {
1383: p = parseAuthority(p, q);
1384: } else if (q < n) {
1385: // DEVIATION: Allow empty authority prior to non-empty
1386: // path, query component or fragment identifier
1387: } else {
1388: failExpecting("authority", p);
1389: }
1390: }
1391:
1392: int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
1393: //Nag As the browser do the encoding automatically, let us not validate the
1394: //path and query
1395: //Bug No:4744455, 4762844
1396: //checkChars( p, q, L_PATH, H_PATH, "path" );
1397: //Bug No:4744455
1398: path = substring(p, q);
1399: p = q;
1400:
1401: if (at(p, n, '?')) {
1402: p++;
1403: q = scan(p, n, "", "#");
1404: //Nag:
1405: //As the browser does the encoding automatically, let us not validate the
1406: //path and query
1407: //Bug No:4744455, 4762844
1408: //checkChars( p, q, L_URIC, H_URIC, "query" );
1409: //Bug No:4744455
1410:
1411: query = substring(p, q);
1412: p = q;
1413: }
1414: return p;
1415: }//parseHierarchical()
1416:
1417: // authority = server | reg_name
1418: //
1419: // Ambiguity: An authority that is a registry name rather than a server
1420: // might have a prefix that parses as a server. We use the fact that
1421: // the authority component is always followed by '/' or the end of the
1422: // input string to resolve this: If the complete authority did not
1423: // parse as a server then we try to parse it as a registry name.
1424: private int parseAuthority(final int start, final int n)
1425: throws URISyntaxException {
1426: final int p = start;
1427: int q = p;
1428: URISyntaxException ex = null;
1429:
1430: final boolean serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
1431: final boolean regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
1432:
1433: if (regChars && !serverChars) {
1434: // Must be a registry-based authority
1435: authority = substring(p, n);
1436: return n;
1437: }
1438:
1439: if (serverChars) {
1440: // Might be (probably is) a server-based authority, so attempt
1441: // to parse it as such. If the attempt fails, try to treat it
1442: // as a registry-based authority.
1443: try {
1444: q = parseServer(p, n);
1445: if (q < n)
1446: failExpecting("end of authority", q);
1447: authority = substring(p, n);
1448: } catch (URISyntaxException x) {
1449: // Undo results of failed parse
1450: userInfo = null;
1451: host = null;
1452: port = -1;
1453: if (requireServerAuthority) {
1454: // If we're insisting upon a server-based authority,
1455: // then just re-throw the exception
1456: throw x;
1457: } else {
1458: // Save the exception in case it doesn't parse as a
1459: // registry either
1460: ex = x;
1461: q = p;
1462: }
1463: }
1464: }
1465:
1466: if (q < n) {
1467: if (regChars) {
1468: // Registry-based authority
1469: authority = substring(p, n);
1470: } else if (ex != null) {
1471: // Re-throw exception; it was probably due to
1472: // a malformed IPv6 address
1473: throw ex;
1474: } else {
1475: fail("Illegal character in authority", q);
1476: }
1477: }
1478:
1479: return n;
1480: }//parseAuthority()
1481:
1482: // [<userinfo>@]<host>[:<port>]
1483: private int parseServer(final int start, final int n)
1484: throws URISyntaxException {
1485: int p = start;
1486: int q;
1487:
1488: // userinfo
1489: q = scan(p, n, "/?#", "@");
1490: if ((q >= p) && at(q, n, '@')) {
1491: checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
1492: userInfo = substring(p, q);
1493: p = q + 1; // Skip '@'
1494: }
1495:
1496: // hostname, IPv4 address, or IPv6 address
1497: if (at(p, n, '[')) {
1498: // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
1499: p++;
1500: q = scan(p, n, "/?#", "]");
1501: if ((q > p) && at(q, n, ']')) {
1502: parseIPv6Reference(p, q);
1503: p = q + 1;
1504: } else {
1505: failExpecting("closing bracket for IPv6 address", q);
1506: }
1507: } else {
1508: q = parseIPv4Address(p, n);
1509: if (q <= p)
1510: q = parseHostname(p, n);
1511: p = q;
1512: }
1513:
1514: // port
1515: if (at(p, n, ':')) {
1516: p++;
1517: q = scan(p, n, "", "/");
1518: if (q > p) {
1519: checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
1520: try {
1521: port = Integer.parseInt(substring(p, q));
1522: } catch (NumberFormatException x) {
1523: fail("Malformed port number", p);
1524: }
1525: p = q;
1526: }
1527: }
1528: if (p < n)
1529: failExpecting("port number", p);
1530:
1531: return p;
1532: }//parseServer()
1533:
1534: // Scan a string of decimal digits whose value fits in a byte
1535: private int scanByte(final int start, final int n)
1536: throws URISyntaxException {
1537: final int p = start;
1538: final int q = scan(p, n, L_DIGIT, H_DIGIT);
1539:
1540: if (q <= p) {
1541: return q;
1542: }
1543:
1544: if (Integer.parseInt(substring(p, q)) > 255) {
1545: return p;
1546: }
1547:
1548: return q;
1549: }//scanByte()
1550:
1551: // Scan an IPv4 address.
1552: //
1553: // If the strict argument is true then we require that the given
1554: // interval contain nothing besides an IPv4 address; if it is false
1555: // then we only require that it start with an IPv4 address.
1556: //
1557: // If the interval does not contain or start with (depending upon the
1558: // strict argument) a legal IPv4 address characters then we return -1
1559: // immediately; otherwise we insist that these characters parse as a
1560: // legal IPv4 address and throw an exception on failure.
1561: //
1562: // We assume that any string of decimal digits and dots must be an IPv4
1563: // address. It won't parse as a hostname anyway, so making that
1564: // assumption here allows more meaningful exceptions to be thrown.
1565: private int scanIPv4Address(final int start, final int n,
1566: final boolean strict) throws URISyntaxException {
1567: int p = start;
1568: int q;
1569: final int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
1570: if ((m <= p) || (strict && (m != n))) {
1571: return -1;
1572: }
1573:
1574: for (;;) {
1575: // Per RFC2732: At most three digits per byte
1576: // Further constraint: Each element fits in a byte
1577: if ((q = scanByte(p, m)) <= p) {
1578: break;
1579: }
1580:
1581: p = q;
1582: if ((q = scan(p, m, '.')) <= p) {
1583: break;
1584: }
1585:
1586: p = q;
1587: if ((q = scanByte(p, m)) <= p) {
1588: break;
1589: }
1590:
1591: p = q;
1592: if ((q = scan(p, m, '.')) <= p) {
1593: break;
1594: }
1595:
1596: p = q;
1597: if ((q = scanByte(p, m)) <= p) {
1598: break;
1599: }
1600:
1601: p = q;
1602: if ((q = scan(p, m, '.')) <= p) {
1603: break;
1604: }
1605:
1606: p = q;
1607: if ((q = scanByte(p, m)) <= p) {
1608: break;
1609: }
1610:
1611: p = q;
1612: if (q < m) {
1613: break;
1614: }
1615:
1616: return q;
1617: }
1618:
1619: fail("Malformed IPv4 address", q);
1620: return -1;
1621: }//scanIPv4Address()
1622:
1623: // Take an IPv4 address: Throw an exception if the given interval
1624: // contains anything except an IPv4 address
1625: private int takeIPv4Address(final int start, final int n,
1626: final String expected) throws URISyntaxException {
1627: final int p = scanIPv4Address(start, n, true);
1628: if (p <= start)
1629: failExpecting(expected, start);
1630: return p;
1631: }
1632:
1633: // Attempt to parse an IPv4 address, returning -1 on failure but
1634: // allowing the given interval to contain characters after the IPv4
1635: // address (e.g., [:<port>])
1636: private int parseIPv4Address(final int start, final int n)
1637: throws URISyntaxException {
1638: final int p = scanIPv4Address(start, n, false);
1639: if (p > start)
1640: host = substring(start, p);
1641: return p;
1642: }
1643:
1644: // hostname = *( domainlabel "." ) toplabel [ "." ]
1645: // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1646: // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1647: private int parseHostname(final int start, final int n)
1648: throws URISyntaxException {
1649: int p = start;
1650: int q;
1651: int l = -1; // Start of last parsed label
1652:
1653: do {
1654: // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
1655: q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
1656: if (q <= p)
1657: break;
1658: l = p;
1659: if (q > p) {
1660: p = q;
1661: q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM
1662: | H_DASH);
1663: if (q > p) {
1664: if (charAt(q - 1) == '-')
1665: fail("Illegal character in hostname", q - 1);
1666: p = q;
1667: }
1668: }
1669: q = scan(p, n, '.');
1670: if (q <= p)
1671: break;
1672: p = q;
1673: } while (p < n);
1674:
1675: if ((p < n) && !at(p, n, ':'))
1676: fail("Illegal character in hostname", p);
1677:
1678: if (l < 0)
1679: failExpecting("hostname", start);
1680:
1681: // Make sure last parsed label (= toplabel) starts with a letter
1682: if (!match(charAt(l), L_ALPHA, H_ALPHA))
1683: fail("Illegal character in hostname", l);
1684:
1685: host = substring(start, p);
1686: return p;
1687: }
1688:
1689: // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
1690: //
1691: // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
1692: // the form ::12.34.56.78, which are clearly shown in the examples
1693: // earlier in the document. Here is the original grammar:
1694: //
1695: // IPv6address = hexpart [ ":" IPv4address ]
1696: // hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
1697: // hexseq = hex4 *( ":" hex4)
1698: // hex4 = 1*4HEXDIG
1699: //
1700: // We therefore use the following revised grammar:
1701: //
1702: // IPv6address = hexseq [ ":" IPv4address ]
1703: // | hexseq [ "::" [ hexpost ] ]
1704: // | "::" [ hexpost ]
1705: // hexpost = hexseq | hexseq ":" IPv4address | IPv4address
1706: // hexseq = hex4 *( ":" hex4)
1707: // hex4 = 1*4HEXDIG
1708: //
1709: // This covers all and only the following cases:
1710: //
1711: // hexseq
1712: // hexseq : IPv4address
1713: // hexseq ::
1714: // hexseq :: hexseq
1715: // hexseq :: hexseq : IPv4address
1716: // hexseq :: IPv4address
1717: // :: hexseq
1718: // :: hexseq : IPv4address
1719: // :: IPv4address
1720: // ::
1721: //
1722: // Finally, we also limit the length of an IPv6 address so that no more
1723: // than sixteen bytes may be specified.
1724:
1725: private int ipv6byteCount = 0;
1726:
1727: private int parseIPv6Reference(final int start, final int n)
1728: throws URISyntaxException {
1729: int p = start;
1730: final int q;
1731:
1732: q = scanHexSeq(p, n);
1733: if (q > p) {
1734: p = q;
1735: if (at(p, n, "::"))
1736: p = scanHexPost(p + 2, n);
1737: else if (at(p, n, ':')) {
1738: p = takeIPv4Address(p + 1, n, "IPv4 address");
1739: ipv6byteCount += 4;
1740: }
1741: } else if (at(p, n, "::")) {
1742: p = scanHexPost(p + 2, n);
1743: }
1744: if (p < n)
1745: fail("Malformed IPv6 address", start);
1746: if (ipv6byteCount > 16)
1747: fail("IPv6 address too long", start);
1748:
1749: host = substring(start - 1, p + 1);
1750: return p;
1751: }
1752:
1753: private int scanHexPost(final int start, final int n)
1754: throws URISyntaxException {
1755: int p = start;
1756: int q;
1757:
1758: if (p == n)
1759: return p;
1760:
1761: q = scanHexSeq(p, n);
1762: if (q > p) {
1763: p = q;
1764: if (at(p, n, ':')) {
1765: p++;
1766: p = takeIPv4Address(p, n,
1767: "hex digits or IPv4 address");
1768: ipv6byteCount += 4;
1769: }
1770: } else {
1771: p = takeIPv4Address(p, n, "hex digits or IPv4 address");
1772: ipv6byteCount += 4;
1773: }
1774: return p;
1775: }
1776:
1777: // Scan a hex sequence; return -1 if one could not be scanned
1778: private int scanHexSeq(final int start, final int n)
1779: throws URISyntaxException {
1780: int p = start;
1781: int q;
1782:
1783: q = scan(p, n, L_HEX, H_HEX);
1784: if (q <= p) {
1785: return -1;
1786: }
1787:
1788: if (at(q, n, '.')) // Beginning of IPv4 address
1789: {
1790: return -1;
1791: }
1792:
1793: ipv6byteCount += 2;
1794: p = q;
1795: while (p < n) {
1796: if (!at(p, n, ':')) {
1797: break;
1798: }
1799:
1800: if (at(p + 1, n, ':')) {
1801: break; // "::"
1802: }
1803:
1804: p++;
1805: q = scan(p, n, L_HEX, H_HEX);
1806: if (q <= p) {
1807: failExpecting("digits for an IPv6 address", p);
1808: }
1809:
1810: if (at(q, n, '.')) { // Beginning of IPv4 address
1811: p--;
1812: break;
1813: }
1814:
1815: if (q > p + 4) {
1816: fail("IPv6 hexadecimal digit sequence too long", p);
1817: }
1818:
1819: ipv6byteCount += 2;
1820: p = q;
1821: }
1822:
1823: return p;
1824: }
1825: }//class Parser
1826:
1827: public static void main(String[] args) throws Exception {
1828: String uri = "#";
1829:
1830: //"http://rajanagendra.India.Sun.COM/ips/desktop?action=content&provider=ipsdtPopupContainer&last=false&leafChannel=ipsdtSampleRSS&fontFace1=Sans-serif&size=100%&containerName=ipsdtTableContainer3&action=content&provider=ipsdtTabContainer&provider_cmds=%3CA+HREF%3D%22http%3A%2F%2Frajanagendra.India.Sun.COM%2Fips-static%2Fdocs%2Fen%2Fdesktop%2Frsschann.htm%22+target%3D%22wthelp%22+onClick%3D%22javascript%3A+var+helpWin%3Dwindow.open%28%27http%3A%2F%2Frajanagendra.India.Sun.COM%2Fips-static%2Fdocs%2Fen%2Fdesktop%2Frsschann.htm%27%2C+%27wthelp%27%2C+%27width%3D600%2Cheight%3D500%2Chotkeys%3Dno%2Cstatus%3Dno%2Cresizable%3Dyes%2Cscrollbars%3Dyes%2Ctoolbar%3Dyes%27%29%3B+helpWin.focus%28%29%3Breturn+false%3B%22%3E%3CIMG+SRC%3D%27%2Fips-static%2Fdesktop%2Fimages%2Fb_help.gif%27+ALT%3D%27Help%27+BORDER%3D0%3E%3C%2FA%3E%3CA+HREF%3D%22javascript%3Avoid%280%29%22+onClick%3D%22openUrlInParent%28%27desktop%3Faction%3Dprocess%26provider%3DipsdtTableContainer3%26ipsdtTableContainer3.channelAction%3Dattach%26ipsdtTableContainer3.targetProvider%3DipsdtSampleRSS%27%29%3B+window.close%28%29%22%3E%3CIMG+SRC%3D%22%2Fips-static%2Fdesktop%2Fimages%2Fb_attach.gif%22+ALT%3D%22Attach++ipsdtSampleRSS%22+BORDER%3D0%3E%3C%2FA%3E%3CA+HREF%3D%22javascript%3Avoid%280%29%22+onClick%3D%22openUrlInParent%28%27desktop%3Faction%3Dprocess%26provider%3DipsdtTableContainer3%26ipsdtTableContainer3.channelAction%3Dremove%26ipsdtTableContainer3.targetProvider%3DipsdtSampleRSS%27%29%3B+window.close%28%29%22%3E%3CIMG+SRC%3D%22%2Fips-static%2Fdesktop%2Fimages%2Fb_remove.gif%22+ALT%3D%22Remove++ipsdtSampleRSS%22+BORDER%3D0%3E%3C%2FA%3E";
1831:
1832: System.out.println(new URI(uri));
1833: //System.out.println( normalize( "null" ) );
1834: }//main()
1835: }//class URI
|