001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.tomcat.util.buf;
019:
020: import java.io.CharArrayWriter;
021: import java.io.IOException;
022: import java.io.Writer;
023: import java.util.BitSet;
024:
025: /** Efficient implementation for encoders.
026: * This class is not thread safe - you need one encoder per thread.
027: * The encoder will save and recycle the internal objects, avoiding
028: * garbage.
029: *
030: * You can add extra characters that you want preserved, for example
031: * while encoding a URL you can add "/".
032: *
033: * @author Costin Manolache
034: */
035: public final class UEncoder {
036:
037: private static org.apache.juli.logging.Log log = org.apache.juli.logging.LogFactory
038: .getLog(UEncoder.class);
039:
040: // Not static - the set may differ ( it's better than adding
041: // an extra check for "/", "+", etc
042: private BitSet safeChars = null;
043: private C2BConverter c2b = null;
044: private ByteChunk bb = null;
045:
046: private String encoding = "UTF8";
047: private static final int debug = 0;
048:
049: public UEncoder() {
050: initSafeChars();
051: }
052:
053: public void setEncoding(String s) {
054: encoding = s;
055: }
056:
057: public void addSafeCharacter(char c) {
058: safeChars.set(c);
059: }
060:
061: /** URL Encode string, using a specified encoding.
062: *
063: * @param buf The writer
064: * @param s string to be encoded
065: * @throws IOException If an I/O error occurs
066: */
067: public void urlEncode(Writer buf, String s) throws IOException {
068: if (c2b == null) {
069: bb = new ByteChunk(16); // small enough.
070: c2b = new C2BConverter(bb, encoding);
071: }
072:
073: for (int i = 0; i < s.length(); i++) {
074: int c = (int) s.charAt(i);
075: if (safeChars.get(c)) {
076: if (debug > 0)
077: log("Safe: " + (char) c);
078: buf.write((char) c);
079: } else {
080: if (debug > 0)
081: log("Unsafe: " + (char) c);
082: c2b.convert((char) c);
083:
084: // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
085: // ( while UCS is 31 ). Amazing...
086: if (c >= 0xD800 && c <= 0xDBFF) {
087: if ((i + 1) < s.length()) {
088: int d = (int) s.charAt(i + 1);
089: if (d >= 0xDC00 && d <= 0xDFFF) {
090: if (debug > 0)
091: log("Unsafe: " + c);
092: c2b.convert((char) d);
093: i++;
094: }
095: }
096: }
097:
098: c2b.flushBuffer();
099:
100: urlEncode(buf, bb.getBuffer(), bb.getOffset(), bb
101: .getLength());
102: bb.recycle();
103: }
104: }
105: }
106:
107: /**
108: */
109: public void urlEncode(Writer buf, byte bytes[], int off, int len)
110: throws IOException {
111: for (int j = off; j < len; j++) {
112: buf.write('%');
113: char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
114: if (debug > 0)
115: log("Encode: " + ch);
116: buf.write(ch);
117: ch = Character.forDigit(bytes[j] & 0xF, 16);
118: if (debug > 0)
119: log("Encode: " + ch);
120: buf.write(ch);
121: }
122: }
123:
124: /**
125: * Utility funtion to re-encode the URL.
126: * Still has problems with charset, since UEncoder mostly
127: * ignores it.
128: */
129: public String encodeURL(String uri) {
130: String outUri = null;
131: try {
132: // XXX optimize - recycle, etc
133: CharArrayWriter out = new CharArrayWriter();
134: urlEncode(out, uri);
135: outUri = out.toString();
136: } catch (IOException iex) {
137: }
138: return outUri;
139: }
140:
141: // -------------------- Internal implementation --------------------
142:
143: //
144: private void init() {
145:
146: }
147:
148: private void initSafeChars() {
149: safeChars = new BitSet(128);
150: int i;
151: for (i = 'a'; i <= 'z'; i++) {
152: safeChars.set(i);
153: }
154: for (i = 'A'; i <= 'Z'; i++) {
155: safeChars.set(i);
156: }
157: for (i = '0'; i <= '9'; i++) {
158: safeChars.set(i);
159: }
160: //safe
161: safeChars.set('$');
162: safeChars.set('-');
163: safeChars.set('_');
164: safeChars.set('.');
165:
166: // Dangerous: someone may treat this as " "
167: // RFC1738 does allow it, it's not reserved
168: // safeChars.set('+');
169: //extra
170: safeChars.set('!');
171: safeChars.set('*');
172: safeChars.set('\'');
173: safeChars.set('(');
174: safeChars.set(')');
175: safeChars.set(',');
176: }
177:
178: private static void log(String s) {
179: if (log.isDebugEnabled())
180: log.debug("Encoder: " + s);
181: }
182: }
|