001 /*
002 * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation. Sun designates this
008 * particular file as subject to the "Classpath" exception as provided
009 * by Sun in the LICENSE file that accompanied this code.
010 *
011 * This code is distributed in the hope that it will be useful, but WITHOUT
012 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014 * version 2 for more details (a copy is included in the LICENSE file that
015 * accompanied this code).
016 *
017 * You should have received a copy of the GNU General Public License version
018 * 2 along with this work; if not, write to the Free Software Foundation,
019 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020 *
021 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022 * CA 95054 USA or visit www.sun.com if you need additional information or
023 * have any questions.
024 */
025
026 package java.net;
027
028 import java.io.*;
029
030 /**
031 * Utility class for HTML form decoding. This class contains static methods
032 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
033 * MIME format.
034 * <p>
035 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
036 * that all characters in the encoded string are one of the following:
037 * "<code>a</code>" through "<code>z</code>",
038 * "<code>A</code>" through "<code>Z</code>",
039 * "<code>0</code>" through "<code>9</code>", and
040 * "<code>-</code>", "<code>_</code>",
041 * "<code>.</code>", and "<code>*</code>". The
042 * character "<code>%</code>" is allowed but is interpreted
043 * as the start of a special escaped sequence.
044 * <p>
045 * The following rules are applied in the conversion:
046 * <p>
047 * <ul>
048 * <li>The alphanumeric characters "<code>a</code>" through
049 * "<code>z</code>", "<code>A</code>" through
050 * "<code>Z</code>" and "<code>0</code>"
051 * through "<code>9</code>" remain the same.
052 * <li>The special characters "<code>.</code>",
053 * "<code>-</code>", "<code>*</code>", and
054 * "<code>_</code>" remain the same.
055 * <li>The plus sign "<code>+</code>" is converted into a
056 * space character "<code> </code>" .
057 * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
058 * treated as representing a byte where <i>xy</i> is the two-digit
059 * hexadecimal representation of the 8 bits. Then, all substrings
060 * that contain one or more of these byte sequences consecutively
061 * will be replaced by the character(s) whose encoding would result
062 * in those consecutive bytes.
063 * The encoding scheme used to decode these characters may be specified,
064 * or if unspecified, the default encoding of the platform will be used.
065 * </ul>
066 * <p>
067 * There are two possible ways in which this decoder could deal with
068 * illegal strings. It could either leave illegal characters alone or
069 * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
070 * Which approach the decoder takes is left to the
071 * implementation.
072 *
073 * @author Mark Chamness
074 * @author Michael McCloskey
075 * @version 1.36, 05/05/07
076 * @since 1.2
077 */
078
079 public class URLDecoder {
080
081 // The platform default encoding
082 static String dfltEncName = URLEncoder.dfltEncName;
083
084 /**
085 * Decodes a <code>x-www-form-urlencoded</code> string.
086 * The platform's default encoding is used to determine what characters
087 * are represented by any consecutive sequences of the form
088 * "<code>%<i>xy</i></code>".
089 * @param s the <code>String</code> to decode
090 * @deprecated The resulting string may vary depending on the platform's
091 * default encoding. Instead, use the decode(String,String) method
092 * to specify the encoding.
093 * @return the newly decoded <code>String</code>
094 */
095 @Deprecated
096 public static String decode(String s) {
097
098 String str = null;
099
100 try {
101 str = decode(s, dfltEncName);
102 } catch (UnsupportedEncodingException e) {
103 // The system should always have the platform default
104 }
105
106 return str;
107 }
108
109 /**
110 * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
111 * encoding scheme.
112 * The supplied encoding is used to determine
113 * what characters are represented by any consecutive sequences of the
114 * form "<code>%<i>xy</i></code>".
115 * <p>
116 * <em><strong>Note:</strong> The <a href=
117 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
118 * World Wide Web Consortium Recommendation</a> states that
119 * UTF-8 should be used. Not doing so may introduce
120 * incompatibilites.</em>
121 *
122 * @param s the <code>String</code> to decode
123 * @param enc The name of a supported
124 * <a href="../lang/package-summary.html#charenc">character
125 * encoding</a>.
126 * @return the newly decoded <code>String</code>
127 * @exception UnsupportedEncodingException
128 * If character encoding needs to be consulted, but
129 * named character encoding is not supported
130 * @see URLEncoder#encode(java.lang.String, java.lang.String)
131 * @since 1.4
132 */
133 public static String decode(String s, String enc)
134 throws UnsupportedEncodingException {
135
136 boolean needToChange = false;
137 int numChars = s.length();
138 StringBuffer sb = new StringBuffer(
139 numChars > 500 ? numChars / 2 : numChars);
140 int i = 0;
141
142 if (enc.length() == 0) {
143 throw new UnsupportedEncodingException(
144 "URLDecoder: empty string enc parameter");
145 }
146
147 char c;
148 byte[] bytes = null;
149 while (i < numChars) {
150 c = s.charAt(i);
151 switch (c) {
152 case '+':
153 sb.append(' ');
154 i++;
155 needToChange = true;
156 break;
157 case '%':
158 /*
159 * Starting with this instance of %, process all
160 * consecutive substrings of the form %xy. Each
161 * substring %xy will yield a byte. Convert all
162 * consecutive bytes obtained this way to whatever
163 * character(s) they represent in the provided
164 * encoding.
165 */
166
167 try {
168
169 // (numChars-i)/3 is an upper bound for the number
170 // of remaining bytes
171 if (bytes == null)
172 bytes = new byte[(numChars - i) / 3];
173 int pos = 0;
174
175 while (((i + 2) < numChars) && (c == '%')) {
176 int v = Integer.parseInt(s.substring(i + 1,
177 i + 3), 16);
178 if (v < 0)
179 throw new IllegalArgumentException(
180 "URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
181 bytes[pos++] = (byte) v;
182 i += 3;
183 if (i < numChars)
184 c = s.charAt(i);
185 }
186
187 // A trailing, incomplete byte encoding such as
188 // "%x" will cause an exception to be thrown
189
190 if ((i < numChars) && (c == '%'))
191 throw new IllegalArgumentException(
192 "URLDecoder: Incomplete trailing escape (%) pattern");
193
194 sb.append(new String(bytes, 0, pos, enc));
195 } catch (NumberFormatException e) {
196 throw new IllegalArgumentException(
197 "URLDecoder: Illegal hex characters in escape (%) pattern - "
198 + e.getMessage());
199 }
200 needToChange = true;
201 break;
202 default:
203 sb.append(c);
204 i++;
205 break;
206 }
207 }
208
209 return (needToChange ? sb.toString() : s);
210 }
211 }
|