001: /*
002: * Copyright 2001-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.commons.codec.net;
018:
019: import java.io.ByteArrayOutputStream;
020: import java.io.UnsupportedEncodingException;
021: import java.util.BitSet;
022:
023: import org.apache.commons.codec.BinaryDecoder;
024: import org.apache.commons.codec.BinaryEncoder;
025: import org.apache.commons.codec.DecoderException;
026: import org.apache.commons.codec.EncoderException;
027: import org.apache.commons.codec.StringDecoder;
028: import org.apache.commons.codec.StringEncoder;
029:
030: /**
031: * <p>Implements the 'www-form-urlencoded' encoding scheme,
032: * also misleadingly known as URL encoding.</p>
033: *
034: * <p>For more detailed information please refer to
035: * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
036: * Chapter 17.13.4 'Form content types'</a> of the
037: * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
038: *
039: * <p>
040: * This codec is meant to be a replacement for standard Java classes
041: * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
042: * on older Java platforms, as these classes in Java versions below
043: * 1.4 rely on the platform's default charset encoding.
044: * </p>
045: *
046: * @author Apache Software Foundation
047: * @since 1.2
048: * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
049: */
050: public class URLCodec implements BinaryEncoder, BinaryDecoder,
051: StringEncoder, StringDecoder {
052:
053: /**
054: * The default charset used for string decoding and encoding.
055: */
056: protected String charset = StringEncodings.UTF8;
057:
058: protected static byte ESCAPE_CHAR = '%';
059: /**
060: * BitSet of www-form-url safe characters.
061: */
062: protected static final BitSet WWW_FORM_URL = new BitSet(256);
063:
064: // Static initializer for www_form_url
065: static {
066: // alpha characters
067: for (int i = 'a'; i <= 'z'; i++) {
068: WWW_FORM_URL.set(i);
069: }
070: for (int i = 'A'; i <= 'Z'; i++) {
071: WWW_FORM_URL.set(i);
072: }
073: // numeric characters
074: for (int i = '0'; i <= '9'; i++) {
075: WWW_FORM_URL.set(i);
076: }
077: // special chars
078: WWW_FORM_URL.set('-');
079: WWW_FORM_URL.set('_');
080: WWW_FORM_URL.set('.');
081: WWW_FORM_URL.set('*');
082: // blank to be replaced with +
083: WWW_FORM_URL.set(' ');
084: }
085:
086: /**
087: * Default constructor.
088: */
089: public URLCodec() {
090: super ();
091: }
092:
093: /**
094: * Constructor which allows for the selection of a default charset
095: *
096: * @param charset the default string charset to use.
097: */
098: public URLCodec(String charset) {
099: super ();
100: this .charset = charset;
101: }
102:
103: /**
104: * Encodes an array of bytes into an array of URL safe 7-bit
105: * characters. Unsafe characters are escaped.
106: *
107: * @param urlsafe bitset of characters deemed URL safe
108: * @param bytes array of bytes to convert to URL safe characters
109: * @return array of bytes containing URL safe characters
110: */
111: public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
112: if (bytes == null) {
113: return null;
114: }
115: if (urlsafe == null) {
116: urlsafe = WWW_FORM_URL;
117: }
118:
119: ByteArrayOutputStream buffer = new ByteArrayOutputStream();
120: for (int i = 0; i < bytes.length; i++) {
121: int b = bytes[i];
122: if (b < 0) {
123: b = 256 + b;
124: }
125: if (urlsafe.get(b)) {
126: if (b == ' ') {
127: b = '+';
128: }
129: buffer.write(b);
130: } else {
131: buffer.write('%');
132: char hex1 = Character.toUpperCase(Character.forDigit(
133: (b >> 4) & 0xF, 16));
134: char hex2 = Character.toUpperCase(Character.forDigit(
135: b & 0xF, 16));
136: buffer.write(hex1);
137: buffer.write(hex2);
138: }
139: }
140: return buffer.toByteArray();
141: }
142:
143: /**
144: * Decodes an array of URL safe 7-bit characters into an array of
145: * original bytes. Escaped characters are converted back to their
146: * original representation.
147: *
148: * @param bytes array of URL safe characters
149: * @return array of original bytes
150: * @throws DecoderException Thrown if URL decoding is unsuccessful
151: */
152: public static final byte[] decodeUrl(byte[] bytes)
153: throws DecoderException {
154: if (bytes == null) {
155: return null;
156: }
157: ByteArrayOutputStream buffer = new ByteArrayOutputStream();
158: for (int i = 0; i < bytes.length; i++) {
159: int b = bytes[i];
160: if (b == '+') {
161: buffer.write(' ');
162: } else if (b == '%') {
163: try {
164: int u = Character.digit((char) bytes[++i], 16);
165: int l = Character.digit((char) bytes[++i], 16);
166: if (u == -1 || l == -1) {
167: throw new DecoderException(
168: "Invalid URL encoding");
169: }
170: buffer.write((char) ((u << 4) + l));
171: } catch (ArrayIndexOutOfBoundsException e) {
172: throw new DecoderException("Invalid URL encoding");
173: }
174: } else {
175: buffer.write(b);
176: }
177: }
178: return buffer.toByteArray();
179: }
180:
181: /**
182: * Encodes an array of bytes into an array of URL safe 7-bit
183: * characters. Unsafe characters are escaped.
184: *
185: * @param bytes array of bytes to convert to URL safe characters
186: * @return array of bytes containing URL safe characters
187: */
188: public byte[] encode(byte[] bytes) {
189: return encodeUrl(WWW_FORM_URL, bytes);
190: }
191:
192: /**
193: * Decodes an array of URL safe 7-bit characters into an array of
194: * original bytes. Escaped characters are converted back to their
195: * original representation.
196: *
197: * @param bytes array of URL safe characters
198: * @return array of original bytes
199: * @throws DecoderException Thrown if URL decoding is unsuccessful
200: */
201: public byte[] decode(byte[] bytes) throws DecoderException {
202: return decodeUrl(bytes);
203: }
204:
205: /**
206: * Encodes a string into its URL safe form using the specified
207: * string charset. Unsafe characters are escaped.
208: *
209: * @param pString string to convert to a URL safe form
210: * @param charset the charset for pString
211: * @return URL safe string
212: * @throws UnsupportedEncodingException Thrown if charset is not
213: * supported
214: */
215: public String encode(String pString, String charset)
216: throws UnsupportedEncodingException {
217: if (pString == null) {
218: return null;
219: }
220: return new String(encode(pString.getBytes(charset)),
221: StringEncodings.US_ASCII);
222: }
223:
224: /**
225: * Encodes a string into its URL safe form using the default string
226: * charset. Unsafe characters are escaped.
227: *
228: * @param pString string to convert to a URL safe form
229: * @return URL safe string
230: * @throws EncoderException Thrown if URL encoding is unsuccessful
231: *
232: * @see #getDefaultCharset()
233: */
234: public String encode(String pString) throws EncoderException {
235: if (pString == null) {
236: return null;
237: }
238: try {
239: return encode(pString, getDefaultCharset());
240: } catch (UnsupportedEncodingException e) {
241: throw new EncoderException(e.getMessage());
242: }
243: }
244:
245: /**
246: * Decodes a URL safe string into its original form using the
247: * specified encoding. Escaped characters are converted back
248: * to their original representation.
249: *
250: * @param pString URL safe string to convert into its original form
251: * @param charset the original string charset
252: * @return original string
253: * @throws DecoderException Thrown if URL decoding is unsuccessful
254: * @throws UnsupportedEncodingException Thrown if charset is not
255: * supported
256: */
257: public String decode(String pString, String charset)
258: throws DecoderException, UnsupportedEncodingException {
259: if (pString == null) {
260: return null;
261: }
262: return new String(decode(pString
263: .getBytes(StringEncodings.US_ASCII)), charset);
264: }
265:
266: /**
267: * Decodes a URL safe string into its original form using the default
268: * string charset. Escaped characters are converted back to their
269: * original representation.
270: *
271: * @param pString URL safe string to convert into its original form
272: * @return original string
273: * @throws DecoderException Thrown if URL decoding is unsuccessful
274: *
275: * @see #getDefaultCharset()
276: */
277: public String decode(String pString) throws DecoderException {
278: if (pString == null) {
279: return null;
280: }
281: try {
282: return decode(pString, getDefaultCharset());
283: } catch (UnsupportedEncodingException e) {
284: throw new DecoderException(e.getMessage());
285: }
286: }
287:
288: /**
289: * Encodes an object into its URL safe form. Unsafe characters are
290: * escaped.
291: *
292: * @param pObject string to convert to a URL safe form
293: * @return URL safe object
294: * @throws EncoderException Thrown if URL encoding is not
295: * applicable to objects of this type or
296: * if encoding is unsuccessful
297: */
298: public Object encode(Object pObject) throws EncoderException {
299: if (pObject == null) {
300: return null;
301: } else if (pObject instanceof byte[]) {
302: return encode((byte[]) pObject);
303: } else if (pObject instanceof String) {
304: return encode((String) pObject);
305: } else {
306: throw new EncoderException("Objects of type "
307: + pObject.getClass().getName()
308: + " cannot be URL encoded");
309:
310: }
311: }
312:
313: /**
314: * Decodes a URL safe object into its original form. Escaped
315: * characters are converted back to their original representation.
316: *
317: * @param pObject URL safe object to convert into its original form
318: * @return original object
319: * @throws DecoderException Thrown if URL decoding is not
320: * applicable to objects of this type
321: * if decoding is unsuccessful
322: */
323: public Object decode(Object pObject) throws DecoderException {
324: if (pObject == null) {
325: return null;
326: } else if (pObject instanceof byte[]) {
327: return decode((byte[]) pObject);
328: } else if (pObject instanceof String) {
329: return decode((String) pObject);
330: } else {
331: throw new DecoderException("Objects of type "
332: + pObject.getClass().getName()
333: + " cannot be URL decoded");
334:
335: }
336: }
337:
338: /**
339: * The <code>String</code> encoding used for decoding and encoding.
340: *
341: * @return Returns the encoding.
342: *
343: * @deprecated use #getDefaultCharset()
344: */
345: public String getEncoding() {
346: return this .charset;
347: }
348:
349: /**
350: * The default charset used for string decoding and encoding.
351: *
352: * @return the default string charset.
353: */
354: public String getDefaultCharset() {
355: return this.charset;
356: }
357:
358: }
|