001: /**
002: * ===========================================
003: * JFreeReport : a free Java reporting library
004: * ===========================================
005: *
006: * Project Info: http://reporting.pentaho.org/
007: *
008: * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
009: *
010: * This library is free software; you can redistribute it and/or modify it under the terms
011: * of the GNU Lesser General Public License as published by the Free Software Foundation;
012: * either version 2.1 of the License, or (at your option) any later version.
013: *
014: * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
015: * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
016: * See the GNU Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License along with this
019: * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
020: * Boston, MA 02111-1307, USA.
021: *
022: * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
023: * in the United States and other countries.]
024: *
025: * ------------
026: * UTFEncodingUtil.java
027: * ------------
028: * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
029: */package org.jfree.report.util;
030:
031: import java.io.UnsupportedEncodingException;
032:
033: import org.jfree.report.JFreeReportBoot;
034: import org.jfree.util.Configuration;
035:
036: /**
037: * Provides a method to encode any string into a URL-safe form. Non-ASCII characters are first encoded as sequences of
038: * two or three bytes, using the UTF-8 algorithm, before being encoded as %HH escapes.
039: * <p/>
040: * Code is the public example given at http://www.w3.org/International/O-URL-code.html
041: *
042: * @author Bert Bos
043: */
044: public class UTFEncodingUtil {
045: /**
046: * Private Constructor prevents Object Creation.
047: */
048: private UTFEncodingUtil() {
049: }
050:
051: /**
052: * A lookup table.
053: */
054: private static final String[] hex = { "%00", "%01", "%02", "%03",
055: "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B",
056: "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
057: "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B",
058: "%1C", "%1D", "%1E", "%1F", "%20", "%21", "%22", "%23",
059: "%24", "%25", "%26", "%27", "%28", "%29", "%2A", "%2B",
060: "%2C", "%2D", "%2E", "%2F", "%30", "%31", "%32", "%33",
061: "%34", "%35", "%36", "%37", "%38", "%39", "%3A", "%3B",
062: "%3C", "%3D", "%3E", "%3F", "%40", "%41", "%42", "%43",
063: "%44", "%45", "%46", "%47", "%48", "%49", "%4A", "%4B",
064: "%4C", "%4D", "%4E", "%4F", "%50", "%51", "%52", "%53",
065: "%54", "%55", "%56", "%57", "%58", "%59", "%5A", "%5B",
066: "%5C", "%5D", "%5E", "%5F", "%60", "%61", "%62", "%63",
067: "%64", "%65", "%66", "%67", "%68", "%69", "%6A", "%6B",
068: "%6C", "%6D", "%6E", "%6F", "%70", "%71", "%72", "%73",
069: "%74", "%75", "%76", "%77", "%78", "%79", "%7A", "%7B",
070: "%7C", "%7D", "%7E", "%7F", "%80", "%81", "%82", "%83",
071: "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
072: "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93",
073: "%94", "%95", "%96", "%97", "%98", "%99", "%9A", "%9B",
074: "%9C", "%9D", "%9E", "%9F", "%A0", "%A1", "%A2", "%A3",
075: "%A4", "%A5", "%A6", "%A7", "%A8", "%A9", "%AA", "%AB",
076: "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
077: "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB",
078: "%BC", "%BD", "%BE", "%BF", "%C0", "%C1", "%C2", "%C3",
079: "%C4", "%C5", "%C6", "%C7", "%C8", "%C9", "%CA", "%CB",
080: "%CC", "%CD", "%CE", "%CF", "%D0", "%D1", "%D2", "%D3",
081: "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
082: "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3",
083: "%E4", "%E5", "%E6", "%E7", "%E8", "%E9", "%EA", "%EB",
084: "%EC", "%ED", "%EE", "%EF", "%F0", "%F1", "%F2", "%F3",
085: "%F4", "%F5", "%F6", "%F7", "%F8", "%F9", "%FA", "%FB",
086: "%FC", "%FD", "%FE", "%FF" };
087:
088: /**
089: * Encode a string according to RFC 1738.
090: * <p/>
091: * <quote> "...Only alphanumerics [0-9a-zA-Z], the special characters "$-_.+!*'()," [not including the quotes - ed],
092: * and reserved characters used for their reserved purposes may be used unencoded within a URL."</quote>
093: * <p/>
094: * <ul> <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z', and '0' through '9' remain the same.
095: * <p/>
096: * <li><p>The unreserved characters - _ . ! ~ * ' ( ) remain the same.
097: * <p/>
098: * <li><p>All other ASCII characters are converted into the 3-character string "%xy", where xy is the two-digit
099: * hexadecimal representation of the character code
100: * <p/>
101: * <li><p>All non-ASCII characters are encoded in two steps: first to a sequence of 2 or 3 bytes, using the UTF-8
102: * algorithm; secondly each of these bytes is encoded as "%xx". </ul>
103: *
104: * @param s The string to be encoded
105: * @return The encoded string
106: */
107: public static String encodeUTF8(final String s) {
108: final StringBuffer sbuf = new StringBuffer();
109: final char[] sChars = s.toCharArray();
110: final int len = sChars.length;
111: for (int i = 0; i < len; i++) {
112: final int ch = sChars[i];
113: if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
114: sbuf.append((char) ch);
115: } else if ('a' <= ch && ch <= 'z') { // 'a'..'z'
116: sbuf.append((char) ch);
117: } else if ('0' <= ch && ch <= '9') { // '0'..'9'
118: sbuf.append((char) ch);
119: } else if (ch == '-'
120: || ch == '_' // unreserved
121: || ch == '.' || ch == '!' || ch == '~' || ch == '*'
122: || ch == '\'' || ch == '(' || ch == ')') {
123: sbuf.append((char) ch);
124: } else if (ch <= 0x007f) { // other ASCII
125: sbuf.append(hex[ch]);
126: } else if (ch <= 0x07FF) { // non-ASCII <= 0x7FF
127: sbuf.append(hex[0xc0 | (ch >> 6)]);
128: sbuf.append(hex[0x80 | (ch & 0x3F)]);
129: } else { // 0x7FF < ch <= 0xFFFF
130: sbuf.append(hex[0xe0 | (ch >> 12)]);
131: sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]);
132: sbuf.append(hex[0x80 | (ch & 0x3F)]);
133: }
134: }
135: return sbuf.toString();
136: }
137:
138: /**
139: * Encodes a byte-array. The array is expected to contain ASCII characters, or the result may not be valid.
140: *
141: * @param s the byte array
142: * @return the array as encoded string.
143: */
144: private static String encodeBytes(final byte[] s) {
145: final StringBuffer sbuf = new StringBuffer();
146: final int len = s.length;
147: for (int i = 0; i < len; i++) {
148: final int ch = (s[i] & 0xff);
149: if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
150: sbuf.append((char) ch);
151: } else if ('a' <= ch && ch <= 'z') { // 'a'..'z'
152: sbuf.append((char) ch);
153: } else if ('0' <= ch && ch <= '9') { // '0'..'9'
154: sbuf.append((char) ch);
155: } else if (ch == '-'
156: || ch == '_' // unreserved
157: || ch == '.' || ch == '!' || ch == '~' || ch == '*'
158: || ch == '\'' || ch == '(' || ch == ')') {
159: sbuf.append((char) ch);
160: } else { // other ASCII
161: sbuf.append(hex[ch]);
162: }
163: }
164: return sbuf.toString();
165: }
166:
167: /**
168: * Encodes thh given string using the provided encoding. The encoding must be a valid Java-encoding.
169: *
170: * @param s the string that should be encoded.
171: * @param encoding the encoding to tranform the string into bytes.
172: * @return the encoded string.
173: * @throws UnsupportedEncodingException if the specified encoding is not recognized.
174: */
175: public static String encode(final String s, final String encoding)
176: throws UnsupportedEncodingException {
177: if ("utf-8".equalsIgnoreCase(encoding)) {
178: return encodeUTF8(s);
179: }
180:
181: return encodeBytes(s.getBytes(encoding));
182: }
183:
184: /**
185: * Encodes the given string using the encoding defined in the report configuration. The encoding will be read from the
186: * global report configuration using the key "org.jfree.report.URLEncoding" as key. The encoding must be a valid
187: * Java-encoding.
188: *
189: * @param s the string that should be encoded.
190: * @return the encoded string.
191: * @throws UnsupportedEncodingException if the specified encoding is not recognized.
192: */
193: public static String encode(final String s)
194: throws UnsupportedEncodingException {
195: final Configuration configuration = JFreeReportBoot
196: .getInstance().getGlobalConfig();
197: final String encoding = configuration
198: .getConfigProperty("org.jfree.report.URLEncoding");
199: return encode(s, encoding);
200:
201: }
202:
203: /**
204: * Decodes the given string using the provided encoding. The encoding must be a valid
205: * Java-encoding.
206: *
207: * @param s the string that should be encoded.
208: * @param encoding the encoding to tranform the bytes into a string.
209: * @return the encoded string.
210: * @throws UnsupportedEncodingException if the specified encoding is not recognized.
211: */
212: public static String decode(final String s, final String encoding)
213: throws UnsupportedEncodingException {
214: if ("utf-8".equalsIgnoreCase(encoding)) {
215: return decodeUTF(s);
216: }
217: // the resulting string will never be greater than the encoded string
218: final byte[] result = new byte[s.length()];
219: final char[] chars = s.toCharArray();
220: int position = 0;
221:
222: for (int i = 0; i < chars.length; i++) {
223: final char ch = chars[i];
224: final int b;
225: switch (ch) {
226: case '%':
227: final char lch = s.charAt(++i);
228: final int hb = (Character.isDigit(lch) ? lch - '0'
229: : 10 + Character.toLowerCase(lch) - 'a') & 0xF;
230: final char hch = s.charAt(++i);
231: final int lb = (Character.isDigit(hch) ? hch - '0'
232: : 10 + Character.toLowerCase(hch) - 'a') & 0xF;
233: b = (hb << 4) | lb;
234: break;
235: case '+':
236: b = ' ';
237: break;
238: default:
239: b = ch;
240: }
241: result[position] = (byte) b;
242: position += 1;
243: }
244: return new String(result, 0, position, encoding);
245: }
246:
247: /**
248: * Decodes the given string using the encoding UTF-8.
249: *
250: * @param s the string that should be encoded.
251: * @return the encoded string.
252: */
253: public static String decodeUTF(final String s) {
254: final StringBuffer sbuf = new StringBuffer();
255: final char[] chars = s.toCharArray();
256: final int l = chars.length;
257: int sumb = 0;
258: for (int i = 0, more = -1; i < l; i++) {
259: /* Get next byte b from URL segment s */
260: final int ch = chars[i];
261: final int b;
262: switch (ch) {
263: case '%':
264: final char lch = s.charAt(++i);
265: final int hb = (Character.isDigit(lch) ? lch - '0'
266: : 10 + Character.toLowerCase(lch) - 'a') & 0xF;
267: final char hch = s.charAt(++i);
268: final int lb = (Character.isDigit(hch) ? hch - '0'
269: : 10 + Character.toLowerCase(hch) - 'a') & 0xF;
270: b = (hb << 4) | lb;
271: break;
272: case '+':
273: b = ' ';
274: break;
275: default:
276: b = ch;
277: }
278: /* Decode byte b as UTF-8, sumb collects incomplete chars */
279: if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte)
280: sumb = (sumb << 6) | (b & 0x3f); // Add 6 bits to sumb
281: if (--more == 0) {
282: sbuf.append((char) sumb); // Add char to sbuf
283: }
284: } else if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits)
285: sbuf.append((char) b); // Store in sbuf
286: } else if ((b & 0xe0) == 0xc0) { // 110xxxxx (yields 5 bits)
287: sumb = b & 0x1f;
288: more = 1; // Expect 1 more byte
289: } else if ((b & 0xf0) == 0xe0) { // 1110xxxx (yields 4 bits)
290: sumb = b & 0x0f;
291: more = 2; // Expect 2 more bytes
292: } else if ((b & 0xf8) == 0xf0) { // 11110xxx (yields 3 bits)
293: sumb = b & 0x07;
294: more = 3; // Expect 3 more bytes
295: } else if ((b & 0xfc) == 0xf8) { // 111110xx (yields 2 bits)
296: sumb = b & 0x03;
297: more = 4; // Expect 4 more bytes
298: } else /*if ((b & 0xfe) == 0xfc)*/
299: { // 1111110x (yields 1 bit)
300: sumb = b & 0x01;
301: more = 5; // Expect 5 more bytes
302: }
303: /* We don't test if the UTF-8 encoding is well-formed */
304: }
305: return sbuf.toString();
306: }
307: }
|