001: /* ====================================================================
002: * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
003: *
004: * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by Jcorporate Ltd.
021: * (http://www.jcorporate.com/)."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. "Jcorporate" and product names such as "Expresso" must
026: * not be used to endorse or promote products derived from this
027: * software without prior written permission. For written permission,
028: * please contact info@jcorporate.com.
029: *
030: * 5. Products derived from this software may not be called "Expresso",
031: * or other Jcorporate product names; nor may "Expresso" or other
032: * Jcorporate product names appear in their name, without prior
033: * written permission of Jcorporate Ltd.
034: *
035: * 6. No product derived from this software may compete in the same
036: * market space, i.e. framework, without prior written permission
037: * of Jcorporate Ltd. For written permission, please contact
038: * partners@jcorporate.com.
039: *
040: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
041: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
042: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
043: * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
044: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
045: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
046: * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
047: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
048: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
049: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
050: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
051: * SUCH DAMAGE.
052: * ====================================================================
053: *
054: * This software consists of voluntary contributions made by many
055: * individuals on behalf of the Jcorporate Ltd. Contributions back
056: * to the project(s) are encouraged when you make modifications.
057: * Please send them to support@jcorporate.com. For more information
058: * on Jcorporate Ltd. and its products, please see
059: * <http://www.jcorporate.com/>.
060: *
061: * Portions of this software are based upon other open source
062: * products and are subject to their respective licenses.
063: */
064:
065: package com.jcorporate.expresso.core.misc;
066:
067: /**
068: * URLUTF8Encoder.java
069: *
070: * Copyright 2001 Jcorporate Ltd.
071: */
072:
073: import com.jcorporate.expresso.kernel.util.FastStringBuffer;
074:
075: import java.lang.ref.SoftReference;
076: import java.util.ArrayList;
077: import java.util.Iterator;
078:
079: /**
080: * Provides a method to encode any string into a URL-safe
081: * form, the so-called "x-www-form-urlencoded" form.
082: * Non-ASCII characters are first encoded as sequences of
083: * two or three bytes, using the UTF-8 algorithm, before being
084: * encoded in "x-www-form-urlencoded".
085: */
086: public class URLUTF8Encoder {
087:
088: private static SoftReference hexValues;
089:
090: private URLUTF8Encoder() {
091: } // no instantiations
092:
093: /*
094: final static String[] hex = {
095: "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
096: "%0a", "%0b", "%0c", "%0d", "%0e", "%0f", "%10", "%11", "%12", "%13",
097: "%14", "%15", "%16", "%17", "%18", "%19", "%1a", "%1b", "%1c", "%1d",
098: "%1e", "%1f", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
099: "%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f", "%30", "%31",
100: "%32", "%33", "%34", "%35", "%36", "%37", "%38", "%39", "%3a", "%3b",
101: "%3c", "%3d", "%3e", "%3f", "%40", "%41", "%42", "%43", "%44", "%45",
102: "%46", "%47", "%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f",
103: "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", "%58", "%59",
104: "%5a", "%5b", "%5c", "%5d", "%5e", "%5f", "%60", "%61", "%62", "%63",
105: "%64", "%65", "%66", "%67", "%68", "%69", "%6a", "%6b", "%6c", "%6d",
106: "%6e", "%6f", "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
107: "%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f", "%80", "%81",
108: "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8a", "%8b",
109: "%8c", "%8d", "%8e", "%8f", "%90", "%91", "%92", "%93", "%94", "%95",
110: "%96", "%97", "%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f",
111: "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7", "%a8", "%a9",
112: "%aa", "%ab", "%ac", "%ad", "%ae", "%af", "%b0", "%b1", "%b2", "%b3",
113: "%b4", "%b5", "%b6", "%b7", "%b8", "%b9", "%ba", "%bb", "%bc", "%bd",
114: "%be", "%bf", "%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7",
115: "%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf", "%d0", "%d1",
116: "%d2", "%d3", "%d4", "%d5", "%d6", "%d7", "%d8", "%d9", "%da", "%db",
117: "%dc", "%dd", "%de", "%df", "%e0", "%e1", "%e2", "%e3", "%e4", "%e5",
118: "%e6", "%e7", "%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef",
119: "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", "%f8", "%f9",
120: "%fa", "%fb", "%fc", "%fd", "%fe", "%ff"
121: };
122: */
123:
124: /**
125: * Encode a string to the "x-www-form-urlencoded" form, enhanced
126: * with the UTF-8-in-URL proposal. This is what happens:
127: * <p/>
128: * <ul>
129: * <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z',
130: * and '0' through '9' remain the same.
131: * <p/>
132: * <li><p>The space character ' ' is converted into a plus sign '+'.
133: * <p/>
134: * <li><p>All other ASCII characters are converted into the
135: * 3-character string "%xy", where xy is
136: * the two-digit hexadecimal representation of the character
137: * code
138: * <p/>
139: * <li><p>All non-ASCII characters are encoded in two steps: first
140: * to a sequence of 2 or 3 bytes, using the UTF-8 algorithm;
141: * secondly each of these bytes is encoded as "%xx".
142: * </ul>
143: *
144: * @param s The string to be encoded
145: * @return The encoded string
146: */
147: public static String encode(String s) {
148: // FastStringBuffer sbuf = new FastStringBuffer( s.length() + s.length());
149: FastStringBuffer sbuf = FastStringBuffer.getInstance();
150: try {
151: String[] hex = getHex();
152: int len = s.length();
153:
154: for (int i = 0; i < len; i++) {
155: int ch = s.charAt(i);
156:
157: if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
158: sbuf.append((char) ch);
159: } else if ('a' <= ch && ch <= 'z') { // 'a'..'z'
160: sbuf.append((char) ch);
161: } else if ('0' <= ch && ch <= '9') { // '0'..'9'
162: sbuf.append((char) ch);
163: } else if (ch == ' ') { // space
164: sbuf.append('+');
165: } else if (ch <= 0x007f) { // other ASCII
166: sbuf.append(hex[ch]);
167: } else if (ch <= 0x07FF) { // non-ASCII <= 0x7FF
168: sbuf.append(hex[0xc0 | (ch >> 6)]);
169: sbuf.append(hex[0x80 | (ch & 0x3F)]);
170: } else { // 0x7FF < ch <= 0xFFFF
171: sbuf.append(hex[0xe0 | (ch >> 12)]);
172: sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]);
173: sbuf.append(hex[0x80 | (ch & 0x3F)]);
174: }
175: }
176:
177: return sbuf.toString();
178: } finally {
179: sbuf.release();
180: }
181: } /* encode(String) */
182:
183: /**
184: * @param s the string to decode
185: * @return a decoded string
186: */
187: public static String decode(String s) {
188: if (s == null) {
189: return null;
190: }
191:
192: s = s.trim();
193:
194: // FastStringBuffer sbuf = new FastStringBuffer(s.length());
195: FastStringBuffer sbuf = FastStringBuffer.getInstance();
196: try {
197: int l = s.length();
198: int ch = -1;
199: int b;
200: int sumb = 0;
201:
202: for (int i = 0; i < l; i++) {
203:
204: /* Get next byte b from URL segment s */
205: switch (ch = s.charAt(i)) {
206: case '%':
207: ch = s.charAt(++i);
208:
209: int hb = (Character.isDigit((char) ch) ? ch - '0'
210: : 10 + Character.toLowerCase((char) ch) - 'a') & 0xF;
211:
212: if (i <= (l - 2)) {
213: ch = s.charAt(++i);
214:
215: int lb = (Character.isDigit((char) ch) ? ch - '0'
216: : 10 + Character.toLowerCase((char) ch) - 'a') & 0xF;
217: b = (hb << 4) | lb;
218: } else {
219: b = ch;
220: }
221:
222: break;
223:
224: case '+':
225: b = ' ';
226: break;
227:
228: default:
229: b = ch;
230: }
231: /* Decode byte b as UTF-8, sumb collects incomplete chars */
232: if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte)
233: sumb = (sumb << 6) | (b & 0x3f); // Add to 6 bits to sumb
234: } else { // Start of new sequence
235: if (i != 0) { // Not on 1st cycle
236: sbuf.append((char) sumb); // Add previous char to sbuf
237: }
238: if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits)
239: sumb = b; // Store in sbuf
240: } else { // 110xxxxx or 1110xxxx
241: sumb = b & 0x1f; // (yields 5 or 4 bits)
242: }
243:
244: /* We don't test if the UTF-8 encoding is well-formed */
245: }
246: }
247: if (sumb != 0) {
248: sbuf.append((char) sumb);
249: }
250:
251: return sbuf.toString().trim();
252: } catch (StringIndexOutOfBoundsException se) {
253: se.printStackTrace(System.err);
254: throw new IllegalArgumentException(
255: "Index out of bounds while " + "decoding string '"
256: + s + "' (length " + s.length() + ")");
257: } finally {
258: sbuf.release();
259: }
260: } /* decode(String) */
261:
262: public static void main(String[] args) {
263: ArrayList testStrings = new ArrayList();
264: testStrings.add("this is a test");
265: testStrings.add("Now\nWe\nGet\tMore%04Complicated");
266: testStrings.add("%Leading percent");
267: testStrings.add("Trailing%");
268: testStrings.add("%Leading and trailing%");
269: testStrings.add("Even@$%%More!&Comlicated^@%");
270: testStrings.add("Even@$%%More!&C|omlic|ated^@%");
271: testStrings.add("|Even@$%%More!&C|omlic|ated^@%|");
272: testStrings.add("LoginName|Admin%|");
273: testStrings
274: .add("|Even@$%%More!&C|omld^@%||Even@$%%More!&C|oml"
275: + "icic|ated^@%||Even@$%%More!&C|omlic|ated^@%||Even@$%%M"
276: + "ore!&C|omlic|ated^@%|");
277: testStrings.add("");
278:
279: String encoded = null;
280: String decoded = null;
281: String testString = null;
282:
283: for (Iterator i = testStrings.iterator(); i.hasNext();) {
284: testString = (String) i.next();
285: encoded = encode(testString);
286: decoded = decode(encoded);
287:
288: if (!decoded.equals(testString)) {
289: System.out.println("Error encoding/decoding string '"
290: + testString + "' (length "
291: + testString.length() + "). Encoded as '"
292: + encoded + "' (length " + encoded.length()
293: + ") and decoded to '" + decoded + "' (length "
294: + decoded.length() + ")");
295: }
296: }
297:
298: System.out.println("Tests Complete");
299: }
300:
301: private static synchronized String[] getHex() {
302: String returnValue[];
303: if (hexValues == null || hexValues.get() == null) {
304: final String[] hex = { "%00", "%01", "%02", "%03", "%04",
305: "%05", "%06", "%07", "%08", "%09", "%0a", "%0b",
306: "%0c", "%0d", "%0e", "%0f", "%10", "%11", "%12",
307: "%13", "%14", "%15", "%16", "%17", "%18", "%19",
308: "%1a", "%1b", "%1c", "%1d", "%1e", "%1f", "%20",
309: "%21", "%22", "%23", "%24", "%25", "%26", "%27",
310: "%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e",
311: "%2f", "%30", "%31", "%32", "%33", "%34", "%35",
312: "%36", "%37", "%38", "%39", "%3a", "%3b", "%3c",
313: "%3d", "%3e", "%3f", "%40", "%41", "%42", "%43",
314: "%44", "%45", "%46", "%47", "%48", "%49", "%4a",
315: "%4b", "%4c", "%4d", "%4e", "%4f", "%50", "%51",
316: "%52", "%53", "%54", "%55", "%56", "%57", "%58",
317: "%59", "%5a", "%5b", "%5c", "%5d", "%5e", "%5f",
318: "%60", "%61", "%62", "%63", "%64", "%65", "%66",
319: "%67", "%68", "%69", "%6a", "%6b", "%6c", "%6d",
320: "%6e", "%6f", "%70", "%71", "%72", "%73", "%74",
321: "%75", "%76", "%77", "%78", "%79", "%7a", "%7b",
322: "%7c", "%7d", "%7e", "%7f", "%80", "%81", "%82",
323: "%83", "%84", "%85", "%86", "%87", "%88", "%89",
324: "%8a", "%8b", "%8c", "%8d", "%8e", "%8f", "%90",
325: "%91", "%92", "%93", "%94", "%95", "%96", "%97",
326: "%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e",
327: "%9f", "%a0", "%a1", "%a2", "%a3", "%a4", "%a5",
328: "%a6", "%a7", "%a8", "%a9", "%aa", "%ab", "%ac",
329: "%ad", "%ae", "%af", "%b0", "%b1", "%b2", "%b3",
330: "%b4", "%b5", "%b6", "%b7", "%b8", "%b9", "%ba",
331: "%bb", "%bc", "%bd", "%be", "%bf", "%c0", "%c1",
332: "%c2", "%c3", "%c4", "%c5", "%c6", "%c7", "%c8",
333: "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf",
334: "%d0", "%d1", "%d2", "%d3", "%d4", "%d5", "%d6",
335: "%d7", "%d8", "%d9", "%da", "%db", "%dc", "%dd",
336: "%de", "%df", "%e0", "%e1", "%e2", "%e3", "%e4",
337: "%e5", "%e6", "%e7", "%e8", "%e9", "%ea", "%eb",
338: "%ec", "%ed", "%ee", "%ef", "%f0", "%f1", "%f2",
339: "%f3", "%f4", "%f5", "%f6", "%f7", "%f8", "%f9",
340: "%fa", "%fb", "%fc", "%fd", "%fe", "%ff" };
341:
342: returnValue = hex;
343: hexValues = new SoftReference(hex);
344: } else {
345: returnValue = (String[]) hexValues.get();
346: }
347:
348: return returnValue;
349: }
350: }
351:
352: /* URLUTF8Encoder */
|