001: // ========================================================================
002: // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
003: // ------------------------------------------------------------------------
004: // Licensed under the Apache License, Version 2.0 (the "License");
005: // you may not use this file except in compliance with the License.
006: // You may obtain a copy of the License at
007: // http://www.apache.org/licenses/LICENSE-2.0
008: // Unless required by applicable law or agreed to in writing, software
009: // distributed under the License is distributed on an "AS IS" BASIS,
010: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011: // See the License for the specific language governing permissions and
012: // limitations under the License.
013: // ========================================================================
014:
015: package org.mortbay.util;
016:
017: import java.io.UnsupportedEncodingException;
018:
019: /* ------------------------------------------------------------ */
020: /** URI Holder.
021: * This class assists with the decoding and encoding or HTTP URI's.
022: * It differs from the java.net.URL class as it does not provide
023: * communications ability, but it does assist with query string
024: * formatting.
025: * <P>UTF-8 encoding is used by default for % encoded characters. This
026: * may be overridden with the org.mortbay.util.URI.charset system property.
027: * @see UrlEncoded
028: * @author Greg Wilkins (gregw)
029: */
030: public class URIUtil implements Cloneable {
031: public static final String HTTP = "http";
032: public static final String HTTP_COLON = "http:";
033: public static final String HTTPS = "https";
034: public static final String HTTPS_COLON = "https:";
035:
036: // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
037: public static final String __CHARSET = System.getProperty(
038: "org.mortbay.util.URI.charset", StringUtil.__UTF8);
039:
040: private URIUtil() {
041: }
042:
043: /* ------------------------------------------------------------ */
044: /** Encode a URI path.
045: * This is the same encoding offered by URLEncoder, except that
046: * the '/' character is not encoded.
047: * @param path The path the encode
048: * @return The encoded path
049: */
050: public static String encodePath(String path) {
051: if (path == null || path.length() == 0)
052: return path;
053:
054: StringBuffer buf = encodePath(null, path);
055: return buf == null ? path : buf.toString();
056: }
057:
058: /* ------------------------------------------------------------ */
059: /** Encode a URI path.
060: * @param path The path the encode
061: * @param buf StringBuffer to encode path into (or null)
062: * @return The StringBuffer or null if no substitutions required.
063: */
064: public static StringBuffer encodePath(StringBuffer buf, String path) {
065: if (buf == null) {
066: loop: for (int i = 0; i < path.length(); i++) {
067: char c = path.charAt(i);
068: switch (c) {
069: case '%':
070: case '?':
071: case ';':
072: case '#':
073: case ' ':
074: buf = new StringBuffer(path.length() << 1);
075: break loop;
076: }
077: }
078: if (buf == null)
079: return null;
080: }
081:
082: synchronized (buf) {
083: for (int i = 0; i < path.length(); i++) {
084: char c = path.charAt(i);
085: switch (c) {
086: case '%':
087: buf.append("%25");
088: continue;
089: case '?':
090: buf.append("%3F");
091: continue;
092: case ';':
093: buf.append("%3B");
094: continue;
095: case '#':
096: buf.append("%23");
097: continue;
098: case ' ':
099: buf.append("%20");
100: continue;
101: default:
102: buf.append(c);
103: continue;
104: }
105: }
106: }
107:
108: return buf;
109: }
110:
111: /* ------------------------------------------------------------ */
112: /** Encode a URI path.
113: * @param path The path the encode
114: * @param buf StringBuffer to encode path into (or null)
115: * @param encode String of characters to encode. % is always encoded.
116: * @return The StringBuffer or null if no substitutions required.
117: */
118: public static StringBuffer encodeString(StringBuffer buf,
119: String path, String encode) {
120: if (buf == null) {
121: loop: for (int i = 0; i < path.length(); i++) {
122: char c = path.charAt(i);
123: if (c == '%' || encode.indexOf(c) >= 0) {
124: buf = new StringBuffer(path.length() << 1);
125: break loop;
126: }
127: }
128: if (buf == null)
129: return null;
130: }
131:
132: synchronized (buf) {
133: for (int i = 0; i < path.length(); i++) {
134: char c = path.charAt(i);
135: if (c == '%' || encode.indexOf(c) >= 0) {
136: buf.append('%');
137: StringUtil.append(buf, (byte) (0xff & c), 16);
138: } else
139: buf.append(c);
140: }
141: }
142:
143: return buf;
144: }
145:
146: /* ------------------------------------------------------------ */
147: /* Decode a URI path.
148: * @param path The path the encode
149: * @param buf StringBuffer to encode path into
150: */
151: public static String decodePath(String path) {
152: if (path == null)
153: return null;
154: char[] chars = null;
155: int n = 0;
156: byte[] bytes = null;
157: int b = 0;
158:
159: int len = path.length();
160:
161: for (int i = 0; i < len; i++) {
162: char c = path.charAt(i);
163:
164: if (c == '%' && (i + 2) < len) {
165: if (chars == null) {
166: chars = new char[len];
167: bytes = new byte[len];
168: path.getChars(0, i, chars, 0);
169: }
170: bytes[b++] = (byte) (0xff & TypeUtil.parseInt(path,
171: i + 1, 2, 16));
172: i += 2;
173: continue;
174: } else if (bytes == null) {
175: n++;
176: continue;
177: }
178:
179: if (b > 0) {
180: String s;
181: try {
182: s = new String(bytes, 0, b, __CHARSET);
183: } catch (UnsupportedEncodingException e) {
184: s = new String(bytes, 0, b);
185: }
186: s.getChars(0, s.length(), chars, n);
187: n += s.length();
188: b = 0;
189: }
190:
191: chars[n++] = c;
192: }
193:
194: if (chars == null)
195: return path;
196:
197: return new String(chars, 0, n);
198: }
199:
200: /* ------------------------------------------------------------ */
201: /* Decode a URI path.
202: * @param path The path the encode
203: * @param buf StringBuffer to encode path into
204: */
205: public static String decodePath(byte[] buf, int offset, int length) {
206: byte[] bytes = null;
207: int n = 0;
208:
209: for (int i = 0; i < length; i++) {
210: byte b = buf[i + offset];
211:
212: if (b == '%' && (i + 2) < length) {
213: b = (byte) (0xff & TypeUtil.parseInt(buf, i + offset
214: + 1, 2, 16));
215: i += 2;
216: } else if (bytes == null) {
217: n++;
218: continue;
219: }
220:
221: if (bytes == null) {
222: bytes = new byte[length];
223: for (int j = 0; j < n; j++)
224: bytes[j] = buf[j + offset];
225: }
226:
227: bytes[n++] = b;
228: }
229:
230: try {
231: if (bytes == null)
232: return new String(buf, offset, length, __CHARSET);
233: return new String(bytes, 0, n, __CHARSET);
234: } catch (UnsupportedEncodingException e) {
235: if (bytes == null)
236: return new String(buf, offset, length);
237: return new String(bytes, 0, n);
238: }
239: }
240:
241: /* ------------------------------------------------------------ */
242: /** Add two URI path segments.
243: * Handles null and empty paths, path and query params (eg ?a=b or
244: * ;JSESSIONID=xxx) and avoids duplicate '/'
245: * @param p1 URI path segment
246: * @param p2 URI path segment
247: * @return Legally combined path segments.
248: */
249: public static String addPaths(String p1, String p2) {
250: if (p1 == null || p1.length() == 0) {
251: if (p1 != null && p2 == null)
252: return p1;
253: return p2;
254: }
255: if (p2 == null || p2.length() == 0)
256: return p1;
257:
258: int split = p1.indexOf(';');
259: if (split < 0)
260: split = p1.indexOf('?');
261: if (split == 0)
262: return p2 + p1;
263: if (split < 0)
264: split = p1.length();
265:
266: StringBuffer buf = new StringBuffer(p1.length() + p2.length()
267: + 2);
268: buf.append(p1);
269:
270: if (buf.charAt(split - 1) == '/') {
271: if (p2.startsWith("/")) {
272: buf.deleteCharAt(split - 1);
273: buf.insert(split - 1, p2);
274: } else
275: buf.insert(split, p2);
276: } else {
277: if (p2.startsWith("/"))
278: buf.insert(split, p2);
279: else {
280: buf.insert(split, '/');
281: buf.insert(split + 1, p2);
282: }
283: }
284:
285: return buf.toString();
286: }
287:
288: /* ------------------------------------------------------------ */
289: /** Return the parent Path.
290: * Treat a URI like a directory path and return the parent directory.
291: */
292: public static String parentPath(String p) {
293: if (p == null || "/".equals(p))
294: return null;
295: int slash = p.lastIndexOf('/', p.length() - 2);
296: if (slash >= 0)
297: return p.substring(0, slash + 1);
298: return null;
299: }
300:
301: /* ------------------------------------------------------------ */
302: /** Strip parameters from a path.
303: * Return path upto any semicolon parameters.
304: */
305: public static String stripPath(String path) {
306: if (path == null)
307: return null;
308: int semi = path.indexOf(';');
309: if (semi < 0)
310: return path;
311: return path.substring(0, semi);
312: }
313:
314: /* ------------------------------------------------------------ */
315: /** Convert a path to a cananonical form.
316: * All instances of "." and ".." are factored out. Null is returned
317: * if the path tries to .. above its root.
318: * @param path
319: * @return path or null.
320: */
321: public static String canonicalPath(String path) {
322: if (path == null || path.length() == 0)
323: return path;
324:
325: int end = path.length();
326: int queryIdx = path.indexOf('?');
327: int start = path.lastIndexOf('/', (queryIdx > 0 ? queryIdx
328: : end));
329:
330: search: while (end > 0) {
331: switch (end - start) {
332: case 2: // possible single dot
333: if (path.charAt(start + 1) != '.')
334: break;
335: break search;
336: case 3: // possible double dot
337: if (path.charAt(start + 1) != '.'
338: || path.charAt(start + 2) != '.')
339: break;
340: break search;
341: }
342:
343: end = start;
344: start = path.lastIndexOf('/', end - 1);
345: }
346:
347: // If we have checked the entire string
348: if (start >= end)
349: return path;
350:
351: StringBuffer buf = new StringBuffer(path);
352: int delStart = -1;
353: int delEnd = -1;
354: int skip = 0;
355:
356: while (end > 0) {
357: switch (end - start) {
358: case 2: // possible single dot
359: if (buf.charAt(start + 1) != '.') {
360: if (skip > 0 && --skip == 0) {
361: delStart = start >= 0 ? start : 0;
362: if (delStart > 0 && delEnd == buf.length()
363: && buf.charAt(delEnd - 1) == '.')
364: delStart++;
365: }
366: break;
367: }
368:
369: if (start < 0 && buf.length() > 2
370: && buf.charAt(1) == '/' && buf.charAt(2) == '/')
371: break;
372:
373: if (delEnd < 0)
374: delEnd = end;
375: delStart = start;
376: if (delStart < 0 || delStart == 0
377: && buf.charAt(delStart) == '/') {
378: delStart++;
379: if (delEnd < buf.length()
380: && buf.charAt(delEnd) == '/')
381: delEnd++;
382: break;
383: }
384: if (end == buf.length())
385: delStart++;
386:
387: end = start--;
388: while (start >= 0 && buf.charAt(start) != '/')
389: start--;
390: continue;
391:
392: case 3: // possible double dot
393: if (buf.charAt(start + 1) != '.'
394: || buf.charAt(start + 2) != '.') {
395: if (skip > 0 && --skip == 0) {
396: delStart = start >= 0 ? start : 0;
397: if (delStart > 0 && delEnd == buf.length()
398: && buf.charAt(delEnd - 1) == '.')
399: delStart++;
400: }
401: break;
402: }
403:
404: delStart = start;
405: if (delEnd < 0)
406: delEnd = end;
407:
408: skip++;
409: end = start--;
410: while (start >= 0 && buf.charAt(start) != '/')
411: start--;
412: continue;
413:
414: default:
415: if (skip > 0 && --skip == 0) {
416: delStart = start >= 0 ? start : 0;
417: if (delEnd == buf.length()
418: && buf.charAt(delEnd - 1) == '.')
419: delStart++;
420: }
421: }
422:
423: // Do the delete
424: if (skip <= 0 && delStart >= 0 && delStart >= 0) {
425: buf.delete(delStart, delEnd);
426: delStart = delEnd = -1;
427: if (skip > 0)
428: delEnd = end;
429: }
430:
431: end = start--;
432: while (start >= 0 && buf.charAt(start) != '/')
433: start--;
434: }
435:
436: // Too many ..
437: if (skip > 0)
438: return null;
439:
440: // Do the delete
441: if (delEnd >= 0)
442: buf.delete(delStart, delEnd);
443:
444: return buf.toString();
445: }
446:
447: /* ------------------------------------------------------------ */
448: /**
449: * @param uri URI
450: * @return True if the uri has a scheme
451: */
452: public static boolean hasScheme(String uri) {
453: for (int i = 0; i < uri.length(); i++) {
454: char c = uri.charAt(i);
455: if (c == ':')
456: return true;
457: if (!(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || (i > 0 && (c >= '0'
458: && c <= '9' || c == '.' || c == '+' || c == '-'))))
459: break;
460: }
461: return false;
462: }
463:
464: }
|