001: /*
002: * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.27 2004/05/05 20:34:01 olegk Exp $
003: * $Revision: 507321 $
004: * $Date: 2007-02-14 01:10:51 +0100 (Wed, 14 Feb 2007) $
005: *
006: * ====================================================================
007: *
008: * Licensed to the Apache Software Foundation (ASF) under one or more
009: * contributor license agreements. See the NOTICE file distributed with
010: * this work for additional information regarding copyright ownership.
011: * The ASF licenses this file to You under the Apache License, Version 2.0
012: * (the "License"); you may not use this file except in compliance with
013: * the License. You may obtain a copy of the License at
014: *
015: * http://www.apache.org/licenses/LICENSE-2.0
016: *
017: * Unless required by applicable law or agreed to in writing, software
018: * distributed under the License is distributed on an "AS IS" BASIS,
019: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
020: * See the License for the specific language governing permissions and
021: * limitations under the License.
022: * ====================================================================
023: *
024: * This software consists of voluntary contributions made by many
025: * individuals on behalf of the Apache Software Foundation. For more
026: * information on the Apache Software Foundation, please see
027: * <http://www.apache.org/>.
028: *
029: */
030:
031: package org.apache.commons.httpclient.util;
032:
033: import java.util.BitSet;
034:
035: import org.apache.commons.codec.DecoderException;
036: import org.apache.commons.codec.net.URLCodec;
037: import org.apache.commons.httpclient.URI;
038: import org.apache.commons.httpclient.URIException;
039:
040: /**
041: * The URI escape and character encoding and decoding utility.
042: * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
043: * than {@link org.apache.commons.httpclient.URI}.
044: *
045: * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
046: * @version $Revision: 507321 $ $Date: 2002/03/14 15:14:01
047: */
048: public class URIUtil {
049:
050: // ----------------------------------------------------- Instance variables
051:
052: protected static final BitSet empty = new BitSet(1);
053:
054: // ---------------------------------------------------------- URI utilities
055:
056: /**
057: * Get the basename of an URI. It's possibly an empty string.
058: *
059: * @param uri a string regarded an URI
060: * @return the basename string; an empty string if the path ends with slash
061: */
062: public static String getName(String uri) {
063: if (uri == null || uri.length() == 0) {
064: return uri;
065: }
066: String path = URIUtil.getPath(uri);
067: int at = path.lastIndexOf("/");
068: int to = path.length();
069: return (at >= 0) ? path.substring(at + 1, to) : path;
070: }
071:
072: /**
073: * Get the query of an URI.
074: *
075: * @param uri a string regarded an URI
076: * @return the query string; <code>null</code> if empty or undefined
077: */
078: public static String getQuery(String uri) {
079: if (uri == null || uri.length() == 0) {
080: return null;
081: }
082: // consider of net_path
083: int at = uri.indexOf("//");
084: int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/",
085: at - 1) >= 0 ? 0 : at + 2) : 0);
086: // the authority part of URI ignored
087: int to = uri.length();
088: // reuse the at and from variables to consider the query
089: at = uri.indexOf("?", from);
090: if (at >= 0) {
091: from = at + 1;
092: } else {
093: return null;
094: }
095: // check the fragment
096: if (uri.lastIndexOf("#") > from) {
097: to = uri.lastIndexOf("#");
098: }
099: // get the path and query.
100: return (from < 0 || from == to) ? null : uri
101: .substring(from, to);
102: }
103:
104: /**
105: * Get the path of an URI.
106: *
107: * @param uri a string regarded an URI
108: * @return the path string
109: */
110: public static String getPath(String uri) {
111: if (uri == null) {
112: return null;
113: }
114: // consider of net_path
115: int at = uri.indexOf("//");
116: int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/",
117: at - 1) >= 0 ? 0 : at + 2) : 0);
118: // the authority part of URI ignored
119: int to = uri.length();
120: // check the query
121: if (uri.indexOf('?', from) != -1) {
122: to = uri.indexOf('?', from);
123: }
124: // check the fragment
125: if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
126: to = uri.lastIndexOf("#");
127: }
128: // get only the path.
129: return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from,
130: to);
131: }
132:
133: /**
134: * Get the path and query of an URI.
135: *
136: * @param uri a string regarded an URI
137: * @return the path and query string
138: */
139: public static String getPathQuery(String uri) {
140: if (uri == null) {
141: return null;
142: }
143: // consider of net_path
144: int at = uri.indexOf("//");
145: int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/",
146: at - 1) >= 0 ? 0 : at + 2) : 0);
147: // the authority part of URI ignored
148: int to = uri.length();
149: // Ignore the '?' mark so to ignore the query.
150: // check the fragment
151: if (uri.lastIndexOf("#") > from) {
152: to = uri.lastIndexOf("#");
153: }
154: // get the path and query.
155: return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from,
156: to);
157: }
158:
159: /**
160: * Get the path of an URI and its rest part.
161: *
162: * @param uri a string regarded an URI
163: * @return the string from the path part
164: */
165: public static String getFromPath(String uri) {
166: if (uri == null) {
167: return null;
168: }
169: // consider of net_path
170: int at = uri.indexOf("//");
171: int from = uri.indexOf("/", at >= 0 ? (uri.lastIndexOf("/",
172: at - 1) >= 0 ? 0 : at + 2) : 0);
173: // get the path and its rest.
174: return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
175: }
176:
177: // ----------------------------------------------------- Encoding utilities
178:
179: /**
180: * Get the all escaped and encoded string with the default protocl charset.
181: * It's the same function to use <code>encode(String unescaped, Bitset
182: * empty, URI.getDefaultProtocolCharset())</code>.
183: *
184: * @param unescaped an unescaped string
185: * @return the escaped string
186: *
187: * @throws URIException if the default protocol charset is not supported
188: *
189: * @see URI#getDefaultProtocolCharset
190: * @see #encode
191: */
192: public static String encodeAll(String unescaped)
193: throws URIException {
194: return encodeAll(unescaped, URI.getDefaultProtocolCharset());
195: }
196:
197: /**
198: * Get the all escaped and encoded string with a given charset.
199: * It's the same function to use <code>encode(String unescaped, Bitset
200: * empty, String charset)</code>.
201: *
202: * @param unescaped an unescaped string
203: * @param charset the charset
204: * @return the escaped string
205: *
206: * @throws URIException if the charset is not supported
207: *
208: * @see #encode
209: */
210: public static String encodeAll(String unescaped, String charset)
211: throws URIException {
212:
213: return encode(unescaped, empty, charset);
214: }
215:
216: /**
217: * Escape and encode a string regarded as within the authority component of
218: * an URI with the default protocol charset.
219: * Within the authority component, the characters ";", ":", "@", "?", and
220: * "/" are reserved.
221: *
222: * @param unescaped an unescaped string
223: * @return the escaped string
224: *
225: * @throws URIException if the default protocol charset is not supported
226: *
227: * @see URI#getDefaultProtocolCharset
228: * @see #encode
229: */
230: public static String encodeWithinAuthority(String unescaped)
231: throws URIException {
232:
233: return encodeWithinAuthority(unescaped, URI
234: .getDefaultProtocolCharset());
235: }
236:
237: /**
238: * Escape and encode a string regarded as within the authority component of
239: * an URI with a given charset.
240: * Within the authority component, the characters ";", ":", "@", "?", and
241: * "/" are reserved.
242: *
243: * @param unescaped an unescaped string
244: * @param charset the charset
245: * @return the escaped string
246: *
247: * @throws URIException if the charset is not supported
248: *
249: * @see #encode
250: */
251: public static String encodeWithinAuthority(String unescaped,
252: String charset) throws URIException {
253:
254: return encode(unescaped, URI.allowed_within_authority, charset);
255: }
256:
257: /**
258: * Escape and encode a string regarded as the path and query components of
259: * an URI with the default protocol charset.
260: *
261: * @param unescaped an unescaped string
262: * @return the escaped string
263: *
264: * @throws URIException if the default protocol charset is not supported
265: *
266: * @see URI#getDefaultProtocolCharset
267: * @see #encode
268: */
269: public static String encodePathQuery(String unescaped)
270: throws URIException {
271: return encodePathQuery(unescaped, URI
272: .getDefaultProtocolCharset());
273: }
274:
275: /**
276: * Escape and encode a string regarded as the path and query components of
277: * an URI with a given charset.
278: *
279: * @param unescaped an unescaped string
280: * @param charset the charset
281: * @return the escaped string
282: *
283: * @throws URIException if the charset is not supported
284: *
285: * @see #encode
286: */
287: public static String encodePathQuery(String unescaped,
288: String charset) throws URIException {
289:
290: int at = unescaped.indexOf('?');
291: if (at < 0) {
292: return encode(unescaped, URI.allowed_abs_path, charset);
293: }
294: // else
295: return encode(unescaped.substring(0, at), URI.allowed_abs_path,
296: charset)
297: + '?'
298: + encode(unescaped.substring(at + 1),
299: URI.allowed_query, charset);
300: }
301:
302: /**
303: * Escape and encode a string regarded as within the path component of an
304: * URI with the default protocol charset.
305: * The path may consist of a sequence of path segments separated by a
306: * single slash "/" character. Within a path segment, the characters
307: * "/", ";", "=", and "?" are reserved.
308: *
309: * @param unescaped an unescaped string
310: * @return the escaped string
311: *
312: * @throws URIException if the default protocol charset is not supported
313: *
314: * @see URI#getDefaultProtocolCharset
315: * @see #encode
316: */
317: public static String encodeWithinPath(String unescaped)
318: throws URIException {
319:
320: return encodeWithinPath(unescaped, URI
321: .getDefaultProtocolCharset());
322: }
323:
324: /**
325: * Escape and encode a string regarded as within the path component of an
326: * URI with a given charset.
327: * The path may consist of a sequence of path segments separated by a
328: * single slash "/" character. Within a path segment, the characters
329: * "/", ";", "=", and "?" are reserved.
330: *
331: * @param unescaped an unescaped string
332: * @param charset the charset
333: * @return the escaped string
334: *
335: * @throws URIException if the charset is not supported
336: *
337: * @see #encode
338: */
339: public static String encodeWithinPath(String unescaped,
340: String charset) throws URIException {
341:
342: return encode(unescaped, URI.allowed_within_path, charset);
343: }
344:
345: /**
346: * Escape and encode a string regarded as the path component of an URI with
347: * the default protocol charset.
348: *
349: * @param unescaped an unescaped string
350: * @return the escaped string
351: *
352: * @throws URIException if the default protocol charset is not supported
353: *
354: * @see URI#getDefaultProtocolCharset
355: * @see #encode
356: */
357: public static String encodePath(String unescaped)
358: throws URIException {
359: return encodePath(unescaped, URI.getDefaultProtocolCharset());
360: }
361:
362: /**
363: * Escape and encode a string regarded as the path component of an URI with
364: * a given charset.
365: *
366: * @param unescaped an unescaped string
367: * @param charset the charset
368: * @return the escaped string
369: *
370: * @throws URIException if the charset is not supported
371: *
372: * @see #encode
373: */
374: public static String encodePath(String unescaped, String charset)
375: throws URIException {
376:
377: return encode(unescaped, URI.allowed_abs_path, charset);
378: }
379:
380: /**
381: * Escape and encode a string regarded as within the query component of an
382: * URI with the default protocol charset.
383: * When a query comprise the name and value pairs, it is used in order
384: * to encode each name and value string. The reserved special characters
385: * within a query component are being included in encoding the query.
386: *
387: * @param unescaped an unescaped string
388: * @return the escaped string
389: *
390: * @throws URIException if the default protocol charset is not supported
391: *
392: * @see URI#getDefaultProtocolCharset
393: * @see #encode
394: */
395: public static String encodeWithinQuery(String unescaped)
396: throws URIException {
397:
398: return encodeWithinQuery(unescaped, URI
399: .getDefaultProtocolCharset());
400: }
401:
402: /**
403: * Escape and encode a string regarded as within the query component of an
404: * URI with a given charset.
405: * When a query comprise the name and value pairs, it is used in order
406: * to encode each name and value string. The reserved special characters
407: * within a query component are being included in encoding the query.
408: *
409: * @param unescaped an unescaped string
410: * @param charset the charset
411: * @return the escaped string
412: *
413: * @throws URIException if the charset is not supported
414: *
415: * @see #encode
416: */
417: public static String encodeWithinQuery(String unescaped,
418: String charset) throws URIException {
419:
420: return encode(unescaped, URI.allowed_within_query, charset);
421: }
422:
423: /**
424: * Escape and encode a string regarded as the query component of an URI with
425: * the default protocol charset.
426: * When a query string is not misunderstood the reserved special characters
427: * ("&", "=", "+", ",", and "$") within a query component, this method
428: * is recommended to use in encoding the whole query.
429: *
430: * @param unescaped an unescaped string
431: * @return the escaped string
432: *
433: * @throws URIException if the default protocol charset is not supported
434: *
435: * @see URI#getDefaultProtocolCharset
436: * @see #encode
437: */
438: public static String encodeQuery(String unescaped)
439: throws URIException {
440: return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
441: }
442:
443: /**
444: * Escape and encode a string regarded as the query component of an URI with
445: * a given charset.
446: * When a query string is not misunderstood the reserved special characters
447: * ("&", "=", "+", ",", and "$") within a query component, this method
448: * is recommended to use in encoding the whole query.
449: *
450: * @param unescaped an unescaped string
451: * @param charset the charset
452: * @return the escaped string
453: *
454: * @throws URIException if the charset is not supported
455: *
456: * @see #encode
457: */
458: public static String encodeQuery(String unescaped, String charset)
459: throws URIException {
460:
461: return encode(unescaped, URI.allowed_query, charset);
462: }
463:
464: /**
465: * Escape and encode a given string with allowed characters not to be
466: * escaped and the default protocol charset.
467: *
468: * @param unescaped a string
469: * @param allowed allowed characters not to be escaped
470: * @return the escaped string
471: *
472: * @throws URIException if the default protocol charset is not supported
473: *
474: * @see URI#getDefaultProtocolCharset
475: */
476: public static String encode(String unescaped, BitSet allowed)
477: throws URIException {
478:
479: return encode(unescaped, allowed, URI
480: .getDefaultProtocolCharset());
481: }
482:
483: /**
484: * Escape and encode a given string with allowed characters not to be
485: * escaped and a given charset.
486: *
487: * @param unescaped a string
488: * @param allowed allowed characters not to be escaped
489: * @param charset the charset
490: * @return the escaped string
491: */
492: public static String encode(String unescaped, BitSet allowed,
493: String charset) throws URIException {
494: byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil
495: .getBytes(unescaped, charset));
496: return EncodingUtil.getAsciiString(rawdata);
497: }
498:
499: /**
500: * Unescape and decode a given string regarded as an escaped string with the
501: * default protocol charset.
502: *
503: * @param escaped a string
504: * @return the unescaped string
505: *
506: * @throws URIException if the string cannot be decoded (invalid)
507: *
508: * @see URI#getDefaultProtocolCharset
509: */
510: public static String decode(String escaped) throws URIException {
511: try {
512: byte[] rawdata = URLCodec.decodeUrl(EncodingUtil
513: .getAsciiBytes(escaped));
514: return EncodingUtil.getString(rawdata, URI
515: .getDefaultProtocolCharset());
516: } catch (DecoderException e) {
517: throw new URIException(e.getMessage());
518: }
519: }
520:
521: /**
522: * Unescape and decode a given string regarded as an escaped string.
523: *
524: * @param escaped a string
525: * @param charset the charset
526: * @return the unescaped string
527: *
528: * @throws URIException if the charset is not supported
529: *
530: * @see Coder#decode
531: */
532: public static String decode(String escaped, String charset)
533: throws URIException {
534:
535: return Coder.decode(escaped.toCharArray(), charset);
536: }
537:
538: // ---------------------------------------------------------- Inner classes
539:
540: /**
541: * The basic and internal utility for URI escape and character encoding and
542: * decoding.
543: *
544: * @deprecated use org.apache.commons.codec.net.URLCodec
545: */
546: protected static class Coder extends URI {
547:
548: /**
549: * Escape and encode a given string with allowed characters not to be
550: * escaped.
551: *
552: * @param unescapedComponent an unescaped component
553: * @param allowed allowed characters not to be escaped
554: * @param charset the charset to encode
555: * @return the escaped and encoded string
556: *
557: * @throws URIException if the charset is not supported
558: *
559: * @deprecated use org.apache.commons.codec.net.URLCodec
560: */
561: public static char[] encode(String unescapedComponent,
562: BitSet allowed, String charset) throws URIException {
563:
564: return URI.encode(unescapedComponent, allowed, charset);
565: }
566:
567: /**
568: * Unescape and decode a given string.
569: *
570: * @param escapedComponent an being-unescaped component
571: * @param charset the charset to decode
572: * @return the escaped and encoded string
573: *
574: * @throws URIException if the charset is not supported
575: *
576: * @deprecated use org.apache.commons.codec.net.URLCodec
577: */
578: public static String decode(char[] escapedComponent,
579: String charset) throws URIException {
580:
581: return URI.decode(escapedComponent, charset);
582: }
583:
584: /**
585: * Verify whether a given string is escaped or not
586: *
587: * @param original given characters
588: * @return true if the given character array is 7 bit ASCII-compatible.
589: */
590: public static boolean verifyEscaped(char[] original) {
591: for (int i = 0; i < original.length; i++) {
592: int c = original[i];
593: if (c > 128) {
594: return false;
595: } else if (c == '%') {
596: if (Character.digit(original[++i], 16) == -1
597: || Character.digit(original[++i], 16) == -1) {
598: return false;
599: }
600: }
601: }
602: return true;
603: }
604:
605: /**
606: * Replace from a given character to given character in an array order
607: * for a given string.
608: *
609: * @param original a given string
610: * @param from a replacing character array
611: * @param to a replaced character array
612: * @return the replaced string
613: */
614: public static String replace(String original, char[] from,
615: char[] to) {
616: for (int i = from.length; i > 0; --i) {
617: original = replace(original, from[i], to[i]);
618: }
619: return original;
620: }
621:
622: /**
623: * Replace from a given character to given character for a given string.
624: *
625: * @param original a given string
626: * @param from a replacing character array
627: * @param to a replaced character array
628: * @return the replaced string
629: */
630: public static String replace(String original, char from, char to) {
631: StringBuffer result = new StringBuffer(original.length());
632: int at, saved = 0;
633: do {
634: at = original.indexOf(from);
635: if (at >= 0) {
636: result.append(original.substring(0, at));
637: result.append(to);
638: } else {
639: result.append(original.substring(saved));
640: }
641: saved = at;
642: } while (at >= 0);
643: return result.toString();
644: }
645: }
646:
647: }
|