001: /*
002: * @(#)URLDecoder.java 1.16 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package java.net;
029:
030: import java.io.*;
031:
032: /**
033: * Utility class for HTML form decoding. This class contains static methods
034: * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
035: * MIME format.
036: * <p>
037: * To conversion process is the reverse of that used by the URLEncoder class. It is assumed
038: * that all characters in the encoded string are one of the following:
039: * "<code>a</code>" through "<code>z</code>",
040: * "<code>A</code>" through "<code>Z</code>",
041: * "<code>0</code>" through "<code>9</code>", and
042: * "<code>-</code>", "<code>_</code>",
043: * "<code>.</code>", and "<code>*</code>". The
044: * character "<code>%</code>" is allowed but is interpreted
045: * as the start of a special escaped sequence.
046: * <p>
047: * The following rules are applied in the conversion:
048: * <p>
049: * <ul>
050: * <li>The alphanumeric characters "<code>a</code>" through
051: * "<code>z</code>", "<code>A</code>" through
052: * "<code>Z</code>" and "<code>0</code>"
053: * through "<code>9</code>" remain the same.
054: * <li>The special characters "<code>.</code>",
055: * "<code>-</code>", "<code>*</code>", and
056: * "<code>_</code>" remain the same.
057: * <li>The plus sign "<code>+</code>" is converted into a
058: * space character "<code> </code>" .
059: * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
060: * treated as representing a byte where <i>xy</i> is the two-digit
061: * hexadecimal representation of the 8 bits. Then, all substrings
062: * that contain one or more of these byte sequences consecutively
063: * will be replaced by the character(s) whose encoding would result
064: * in those consecutive bytes.
065: * The encoding scheme used to decode these characters may be specified,
066: * or if unspecified, the default encoding of the platform will be used.
067: * </ul>
068: * <p>
069: * There are two possible ways in which this decoder could deal with
070: * illegal strings. It could either leave illegal characters alone or
071: * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
072: * Which approach the decoder takes is left to the
073: * implementation.
074: *
075: * @author Mark Chamness
076: * @author Michael McCloskey
077: * @version 1.9, 02/02/00
078: * @since 1.2
079: */
080:
081: public class URLDecoder {
082:
083: // The platform default encoding
084: static String dfltEncName = URLEncoder.dfltEncName;
085:
086: /**
087: * Decodes a <code>x-www-form-urlencoded</code> string.
088: * The platform's default encoding is used to determine what characters
089: * are represented by any consecutive sequences of the form
090: * "<code>%<i>xy</i></code>".
091: * @param s the <code>String</code> to decode
092: * @deprecated The resulting string may vary depending on the platform's
093: * default encoding. Instead, use the decode(String,String) method
094: * to specify the encoding.
095: * @return the newly decoded <code>String</code>
096: */
097: public static String decode(String s) {
098:
099: String str = null;
100:
101: try {
102: str = decode(s, dfltEncName);
103: } catch (UnsupportedEncodingException e) {
104: // The system should always have the platform default
105: }
106:
107: return str;
108: }
109:
110: /**
111: * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
112: * encoding scheme.
113: * The supplied encoding is used to determine
114: * what characters are represented by any consecutive sequences of the
115: * form "<code>%<i>xy</i></code>".
116: * <p>
117: * <em><strong>Note:</strong> The <a href=
118: * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
119: * World Wide Web Consortium Recommendation</a> states that
120: * UTF-8 should be used. Not doing so may introduce
121: * incompatibilites.</em>
122: *
123: * @param s the <code>String</code> to decode
124: * @param enc The name of a supported
125: * <a href="../lang/package-summary.html#charenc">character
126: * encoding</a>.
127: * @return the newly decoded <code>String</code>
128: * @exception UnsupportedEncodingException
129: * If the named encoding is not supported
130: * @see URLEncoder#encode(java.lang.String, java.lang.String)
131: * @since 1.4
132: */
133: public static String decode(String s, String enc)
134: throws UnsupportedEncodingException {
135:
136: boolean needToChange = false;
137: StringBuffer sb = new StringBuffer();
138: int numChars = s.length();
139: int i = 0;
140:
141: if (enc.length() == 0) {
142: throw new UnsupportedEncodingException(
143: "URLDecoder: empty string enc parameter");
144: }
145:
146: while (i < numChars) {
147: char c = s.charAt(i);
148: switch (c) {
149: case '+':
150: sb.append(' ');
151: i++;
152: needToChange = true;
153: break;
154: case '%':
155: /*
156: * Starting with this instance of %, process all
157: * consecutive substrings of the form %xy. Each
158: * substring %xy will yield a byte. Convert all
159: * consecutive bytes obtained this way to whatever
160: * character(s) they represent in the provided
161: * encoding.
162: */
163:
164: try {
165:
166: // (numChars-i)/3 is an upper bound for the number
167: // of remaining bytes
168: byte[] bytes = new byte[(numChars - i) / 3];
169: int pos = 0;
170:
171: while (((i + 2) < numChars) && (c == '%')) {
172: bytes[pos++] = (byte) Integer.parseInt(s
173: .substring(i + 1, i + 3), 16);
174: i += 3;
175: if (i < numChars)
176: c = s.charAt(i);
177: }
178:
179: // A trailing, incomplete byte encoding such as
180: // "%x" will cause an exception to be thrown
181:
182: if ((i < numChars) && (c == '%'))
183: throw new IllegalArgumentException(
184: "URLDecoder: Incomplete trailing escape (%) pattern");
185:
186: sb.append(new String(bytes, 0, pos, enc));
187: } catch (NumberFormatException e) {
188: throw new IllegalArgumentException(
189: "URLDecoder: Illegal hex characters in escape (%) pattern - "
190: + e.getMessage());
191: }
192: needToChange = true;
193: break;
194: default:
195: sb.append(c);
196: i++;
197: break;
198: }
199: }
200:
201: return (needToChange ? sb.toString() : s);
202: }
203: }
|