001: /*
002: * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/EncodingUtil.java,v 1.8 2004/05/13 04:01:22 mbecke Exp $
003: * $Revision: 480424 $
004: * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $
005: *
006: * ====================================================================
007: *
008: * Licensed to the Apache Software Foundation (ASF) under one or more
009: * contributor license agreements. See the NOTICE file distributed with
010: * this work for additional information regarding copyright ownership.
011: * The ASF licenses this file to You under the Apache License, Version 2.0
012: * (the "License"); you may not use this file except in compliance with
013: * the License. You may obtain a copy of the License at
014: *
015: * http://www.apache.org/licenses/LICENSE-2.0
016: *
017: * Unless required by applicable law or agreed to in writing, software
018: * distributed under the License is distributed on an "AS IS" BASIS,
019: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
020: * See the License for the specific language governing permissions and
021: * limitations under the License.
022: * ====================================================================
023: *
024: * This software consists of voluntary contributions made by many
025: * individuals on behalf of the Apache Software Foundation. For more
026: * information on the Apache Software Foundation, please see
027: * <http://www.apache.org/>.
028: *
029: */
030: package org.apache.commons.httpclient.util;
031:
032: import java.io.UnsupportedEncodingException;
033:
034: import org.apache.commons.codec.net.URLCodec;
035: import org.apache.commons.httpclient.HttpClientError;
036: import org.apache.commons.httpclient.NameValuePair;
037: import org.apache.commons.logging.Log;
038: import org.apache.commons.logging.LogFactory;
039:
040: /**
041: * The home for utility methods that handle various encoding tasks.
042: *
043: * @author Michael Becke
044: * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
045: *
046: * @since 2.0 final
047: */
048: public class EncodingUtil {
049:
050: /** Default content encoding chatset */
051: private static final String DEFAULT_CHARSET = "ISO-8859-1";
052:
053: /** Log object for this class. */
054: private static final Log LOG = LogFactory
055: .getLog(EncodingUtil.class);
056:
057: /**
058: * Form-urlencoding routine.
059: *
060: * The default encoding for all forms is `application/x-www-form-urlencoded'.
061: * A form data set is represented in this media type as follows:
062: *
063: * The form field names and values are escaped: space characters are replaced
064: * by `+', and then reserved characters are escaped as per [URL]; that is,
065: * non-alphanumeric characters are replaced by `%HH', a percent sign and two
066: * hexadecimal digits representing the ASCII code of the character. Line breaks,
067: * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
068: *
069: * <p>
070: * if the given charset is not supported, ISO-8859-1 is used instead.
071: * </p>
072: *
073: * @param pairs the values to be encoded
074: * @param charset the character set of pairs to be encoded
075: *
076: * @return the urlencoded pairs
077: *
078: * @since 2.0 final
079: */
080: public static String formUrlEncode(NameValuePair[] pairs,
081: String charset) {
082: try {
083: return doFormUrlEncode(pairs, charset);
084: } catch (UnsupportedEncodingException e) {
085: LOG.error("Encoding not supported: " + charset);
086: try {
087: return doFormUrlEncode(pairs, DEFAULT_CHARSET);
088: } catch (UnsupportedEncodingException fatal) {
089: // Should never happen. ISO-8859-1 must be supported on all JVMs
090: throw new HttpClientError("Encoding not supported: "
091: + DEFAULT_CHARSET);
092: }
093: }
094: }
095:
096: /**
097: * Form-urlencoding routine.
098: *
099: * The default encoding for all forms is `application/x-www-form-urlencoded'.
100: * A form data set is represented in this media type as follows:
101: *
102: * The form field names and values are escaped: space characters are replaced
103: * by `+', and then reserved characters are escaped as per [URL]; that is,
104: * non-alphanumeric characters are replaced by `%HH', a percent sign and two
105: * hexadecimal digits representing the ASCII code of the character. Line breaks,
106: * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
107: *
108: * @param pairs the values to be encoded
109: * @param charset the character set of pairs to be encoded
110: *
111: * @return the urlencoded pairs
112: * @throws UnsupportedEncodingException if charset is not supported
113: *
114: * @since 2.0 final
115: */
116: private static String doFormUrlEncode(NameValuePair[] pairs,
117: String charset) throws UnsupportedEncodingException {
118: StringBuffer buf = new StringBuffer();
119: for (int i = 0; i < pairs.length; i++) {
120: URLCodec codec = new URLCodec();
121: NameValuePair pair = pairs[i];
122: if (pair.getName() != null) {
123: if (i > 0) {
124: buf.append("&");
125: }
126: buf.append(codec.encode(pair.getName(), charset));
127: buf.append("=");
128: if (pair.getValue() != null) {
129: buf.append(codec.encode(pair.getValue(), charset));
130: }
131: }
132: }
133: return buf.toString();
134: }
135:
136: /**
137: * Converts the byte array of HTTP content characters to a string. If
138: * the specified charset is not supported, default system encoding
139: * is used.
140: *
141: * @param data the byte array to be encoded
142: * @param offset the index of the first byte to encode
143: * @param length the number of bytes to encode
144: * @param charset the desired character encoding
145: * @return The result of the conversion.
146: *
147: * @since 3.0
148: */
149: public static String getString(final byte[] data, int offset,
150: int length, String charset) {
151:
152: if (data == null) {
153: throw new IllegalArgumentException(
154: "Parameter may not be null");
155: }
156:
157: if (charset == null || charset.length() == 0) {
158: throw new IllegalArgumentException(
159: "charset may not be null or empty");
160: }
161:
162: try {
163: return new String(data, offset, length, charset);
164: } catch (UnsupportedEncodingException e) {
165:
166: if (LOG.isWarnEnabled()) {
167: LOG.warn("Unsupported encoding: " + charset
168: + ". System encoding used");
169: }
170: return new String(data, offset, length);
171: }
172: }
173:
174: /**
175: * Converts the byte array of HTTP content characters to a string. If
176: * the specified charset is not supported, default system encoding
177: * is used.
178: *
179: * @param data the byte array to be encoded
180: * @param charset the desired character encoding
181: * @return The result of the conversion.
182: *
183: * @since 3.0
184: */
185: public static String getString(final byte[] data, String charset) {
186: return getString(data, 0, data.length, charset);
187: }
188:
189: /**
190: * Converts the specified string to a byte array. If the charset is not supported the
191: * default system charset is used.
192: *
193: * @param data the string to be encoded
194: * @param charset the desired character encoding
195: * @return The resulting byte array.
196: *
197: * @since 3.0
198: */
199: public static byte[] getBytes(final String data, String charset) {
200:
201: if (data == null) {
202: throw new IllegalArgumentException("data may not be null");
203: }
204:
205: if (charset == null || charset.length() == 0) {
206: throw new IllegalArgumentException(
207: "charset may not be null or empty");
208: }
209:
210: try {
211: return data.getBytes(charset);
212: } catch (UnsupportedEncodingException e) {
213:
214: if (LOG.isWarnEnabled()) {
215: LOG.warn("Unsupported encoding: " + charset
216: + ". System encoding used.");
217: }
218:
219: return data.getBytes();
220: }
221: }
222:
223: /**
224: * Converts the specified string to byte array of ASCII characters.
225: *
226: * @param data the string to be encoded
227: * @return The string as a byte array.
228: *
229: * @since 3.0
230: */
231: public static byte[] getAsciiBytes(final String data) {
232:
233: if (data == null) {
234: throw new IllegalArgumentException(
235: "Parameter may not be null");
236: }
237:
238: try {
239: return data.getBytes("US-ASCII");
240: } catch (UnsupportedEncodingException e) {
241: throw new HttpClientError(
242: "HttpClient requires ASCII support");
243: }
244: }
245:
246: /**
247: * Converts the byte array of ASCII characters to a string. This method is
248: * to be used when decoding content of HTTP elements (such as response
249: * headers)
250: *
251: * @param data the byte array to be encoded
252: * @param offset the index of the first byte to encode
253: * @param length the number of bytes to encode
254: * @return The string representation of the byte array
255: *
256: * @since 3.0
257: */
258: public static String getAsciiString(final byte[] data, int offset,
259: int length) {
260:
261: if (data == null) {
262: throw new IllegalArgumentException(
263: "Parameter may not be null");
264: }
265:
266: try {
267: return new String(data, offset, length, "US-ASCII");
268: } catch (UnsupportedEncodingException e) {
269: throw new HttpClientError(
270: "HttpClient requires ASCII support");
271: }
272: }
273:
274: /**
275: * Converts the byte array of ASCII characters to a string. This method is
276: * to be used when decoding content of HTTP elements (such as response
277: * headers)
278: *
279: * @param data the byte array to be encoded
280: * @return The string representation of the byte array
281: *
282: * @since 3.0
283: */
284: public static String getAsciiString(final byte[] data) {
285: return getAsciiString(data, 0, data.length);
286: }
287:
288: /**
289: * This class should not be instantiated.
290: */
291: private EncodingUtil() {
292: }
293:
294: }
|