001: package com.sun.portal.netlet.util;
002:
003: import java.io.BufferedWriter;
004: import java.io.ByteArrayOutputStream;
005: import java.io.IOException;
006: import java.io.OutputStreamWriter;
007: import java.util.BitSet;
008:
009: /**
010: * UrlEncoder: java.net.URLEncoder (jdk ver 1.3) doesn't handle multibyte
011: * (japanese) characters properly. The encode method assumes platform default
012: * encoding which does not help our case. Hence the code has been borrowed from
013: * jdk1.4's implementation of URLEncoder where the encoding to be used is a
014: * parameter. This class handles those multibyte characters and encodes the
015: * input string based on the character encoding input.
016: *
017: * input: string to be translated, character encoding. output: the translated
018: * string.
019: *
020: */
021:
022: public class UrlEncoder {
023: static BitSet dontNeedEncoding;
024:
025: static final int caseDiff = ('a' - 'A');
026:
027: private static final String defaultEncoding = "UTF8";
028:
029: static {
030: dontNeedEncoding = new BitSet(256);
031: int i;
032: for (i = 'a'; i <= 'z'; i++) {
033: dontNeedEncoding.set(i);
034: }
035: for (i = 'A'; i <= 'Z'; i++) {
036: dontNeedEncoding.set(i);
037: }
038: for (i = '0'; i <= '9'; i++) {
039: dontNeedEncoding.set(i);
040: }
041: dontNeedEncoding.set(' '); /*
042: * encoding a space to a + is done in the
043: * encode() method
044: */
045: dontNeedEncoding.set('-');
046: dontNeedEncoding.set('_');
047: dontNeedEncoding.set('.');
048: dontNeedEncoding.set('*');
049: }
050:
051: private UrlEncoder() {
052: }
053:
054: public static String encode(String s) {
055: return encode(s, defaultEncoding);
056: }
057:
058: public static String encode(String s, String enc) {
059: try {
060: boolean needToChange = false;
061: boolean wroteUnencodedChar = false;
062: int maxBytesPerChar = 10; // rather arbitrary limit, but safe for
063: // now
064: StringBuffer out = new StringBuffer(s.length());
065: ByteArrayOutputStream buf = new ByteArrayOutputStream(
066: maxBytesPerChar);
067:
068: BufferedWriter writer = new BufferedWriter(
069: new OutputStreamWriter(buf, enc));
070:
071: for (int i = 0; i < s.length(); i++) {
072: int c = (int) s.charAt(i);
073: if (dontNeedEncoding.get(c)) {
074: if (c == ' ') {
075: c = '+';
076: needToChange = true;
077: }
078: out.append((char) c);
079: wroteUnencodedChar = true;
080: } else {
081: // convert to external encoding before hex conversion
082: try {
083: if (wroteUnencodedChar) {
084: writer = new BufferedWriter(
085: new OutputStreamWriter(buf, enc));
086: wroteUnencodedChar = false;
087: }
088: writer.write(c);
089: /*
090: * If this character represents the start of a Unicode
091: * surrogate pair, then pass in two characters. It's not
092: * clear what should be done if a bytes reserved in the
093: * surrogate pairs range occurs outside of a legal
094: * surrogate pair. For now, just treat it as if it were
095: * any other character.
096: */
097: if (c >= 0xD800 && c <= 0xDBFF) {
098: if ((i + 1) < s.length()) {
099: int d = (int) s.charAt(i + 1);
100: if (d >= 0xDC00 && d <= 0xDFFF) {
101: writer.write(d);
102: i++;
103: }
104: }
105: }
106: writer.flush();
107: } catch (IOException e) {
108: buf.reset();
109: continue;
110: }
111: byte[] ba = buf.toByteArray();
112: for (int j = 0; j < ba.length; j++) {
113: out.append('%');
114: char ch = Character.forDigit(
115: (ba[j] >> 4) & 0xF, 16);
116: // converting to use uppercase letter as part of
117: // the hex value if ch is a letter.
118: if (Character.isLetter(ch)) {
119: ch -= caseDiff;
120: }
121: out.append(ch);
122: ch = Character.forDigit(ba[j] & 0xF, 16);
123: if (Character.isLetter(ch)) {
124: ch -= caseDiff;
125: }
126: out.append(ch);
127: }
128: buf.reset();
129: needToChange = true;
130: }
131: }
132:
133: return (needToChange ? out.toString() : s);
134: } catch (Exception e) {
135: e.printStackTrace();
136: return s; // Return back original string . Any other solution ??
137: }
138: }
139: }
|