001: /*
002: * @(#)ParseUtil.java 1.15 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.net.www;
029:
030: import java.util.BitSet;
031: import java.io.UnsupportedEncodingException;
032: import java.io.File;
033: import java.net.URL;
034: import java.net.MalformedURLException;
035:
036: /**
037: * A class that contains useful routines common to sun.net.www
038: * @author Mike McCloskey
039: */
040:
041: public class ParseUtil {
042: static BitSet encodedInPath;
043:
044: static {
045: encodedInPath = new BitSet(256);
046:
047: // Set the bits corresponding to characters that are encoded in the
048: // path component of a URI.
049:
050: // These characters are reserved in the path segment as described in
051: // RFC2396 section 3.3.
052: encodedInPath.set('=');
053: encodedInPath.set(';');
054: encodedInPath.set('?');
055: encodedInPath.set('/');
056:
057: // These characters are defined as excluded in RFC2396 section 2.4.3
058: // and must be escaped if they occur in the data part of a URI.
059: encodedInPath.set('#');
060: encodedInPath.set(' ');
061: encodedInPath.set('<');
062: encodedInPath.set('>');
063: encodedInPath.set('%');
064: encodedInPath.set('"');
065: encodedInPath.set('{');
066: encodedInPath.set('}');
067: encodedInPath.set('|');
068: encodedInPath.set('\\');
069: encodedInPath.set('^');
070: encodedInPath.set('[');
071: encodedInPath.set(']');
072: encodedInPath.set('`');
073:
074: // US ASCII control characters 00-1F and 7F.
075: for (int i = 0; i < 32; i++)
076: encodedInPath.set(i);
077: encodedInPath.set(127);
078: }
079:
080: /**
081: * Constructs an encoded version of the specified path string suitable
082: * for use in the construction of a URL.
083: *
084: * A path separator is replaced by a forward slash. The string is UTF8
085: * encoded. The % escape sequence is used for characters that are above
086: * 0x7F or those defined in RFC2396 as reserved or excluded in the path
087: * component of a URL.
088: */
089: public static String encodePath(String path) {
090: StringBuffer sb = new StringBuffer();
091: int n = path.length();
092: for (int i = 0; i < n; i++) {
093: char c = path.charAt(i);
094: if (c == File.separatorChar)
095: sb.append('/');
096: else {
097: if (c <= 0x007F) {
098: if (encodedInPath.get(c))
099: escape(sb, c);
100: else
101: sb.append(c);
102: } else if (c > 0x07FF) {
103: escape(sb, (char) (0xE0 | ((c >> 12) & 0x0F)));
104: escape(sb, (char) (0x80 | ((c >> 6) & 0x3F)));
105: escape(sb, (char) (0x80 | ((c >> 0) & 0x3F)));
106: } else {
107: escape(sb, (char) (0xC0 | ((c >> 6) & 0x1F)));
108: escape(sb, (char) (0x80 | ((c >> 0) & 0x3F)));
109: }
110: }
111: }
112: return sb.toString();
113: }
114:
115: /**
116: * Appends the URL escape sequence for the specified char to the
117: * specified StringBuffer.
118: */
119: private static void escape(StringBuffer s, char c) {
120: s.append('%');
121: s.append(Character.forDigit((c >> 4) & 0xF, 16));
122: s.append(Character.forDigit(c & 0xF, 16));
123: }
124:
125: /**
126: * Un-escape and return the character at position i in string s.
127: */
128: private static char unescape(String s, int i) {
129: return (char) Integer.parseInt(s.substring(i + 1, i + 3), 16);
130: }
131:
132: /**
133: * Returns a new String constructed from the specified String by replacing
134: * the URL escape sequences and UTF8 encoding with the characters they
135: * represent.
136: */
137: public static String decode(String s) {
138: StringBuffer sb = new StringBuffer();
139:
140: int i = 0;
141: while (i < s.length()) {
142: char c = s.charAt(i);
143: char c2, c3;
144:
145: if (c != '%') {
146: i++;
147: } else {
148: try {
149: c = unescape(s, i);
150: i += 3;
151:
152: if ((c & 0x80) != 0) {
153: switch (c >> 4) {
154: case 0xC:
155: case 0xD:
156: c2 = unescape(s, i);
157: i += 3;
158: c = (char) (((c & 0x1f) << 6) | (c2 & 0x3f));
159: break;
160:
161: case 0xE:
162: c2 = unescape(s, i);
163: i += 3;
164: c3 = unescape(s, i);
165: i += 3;
166: c = (char) (((c & 0x0f) << 12)
167: | ((c2 & 0x3f) << 6) | (c3 & 0x3f));
168: break;
169:
170: default:
171: throw new IllegalArgumentException();
172: }
173: }
174: } catch (NumberFormatException e) {
175: throw new IllegalArgumentException();
176: }
177: }
178:
179: sb.append(c);
180: }
181:
182: return sb.toString();
183: }
184:
185: /**
186: * Returns a canonical version of the specified string.
187: */
188: public String canonizeString(String file) {
189: int i = 0;
190: int lim = file.length();
191:
192: // Remove embedded /../
193: while ((i = file.indexOf("/../")) >= 0) {
194: if ((lim = file.lastIndexOf('/', i - 1)) >= 0) {
195: file = file.substring(0, lim) + file.substring(i + 3);
196: } else {
197: file = file.substring(i + 3);
198: }
199: }
200: // Remove embedded /./
201: while ((i = file.indexOf("/./")) >= 0) {
202: file = file.substring(0, i) + file.substring(i + 2);
203: }
204: // Remove trailing ..
205: while (file.endsWith("/..")) {
206: i = file.indexOf("/..");
207: if ((lim = file.lastIndexOf('/', i - 1)) >= 0) {
208: file = file.substring(0, lim + 1);
209: } else {
210: file = file.substring(0, i);
211: }
212: }
213: // Remove trailing .
214: if (file.endsWith("/."))
215: file = file.substring(0, file.length() - 1);
216:
217: return file;
218: }
219:
220: public static URL fileToEncodedURL(File file)
221: throws MalformedURLException {
222: String path = file.getAbsolutePath();
223: path = ParseUtil.encodePath(path);
224: if (!path.startsWith("/")) {
225: path = "/" + path;
226: }
227: if (!path.endsWith("/") && file.isDirectory()) {
228: path = path + "/";
229: }
230: return new URL("file", "", path);
231: }
232:
233: }
|