001: /*
002: * Copyright 1999-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.tomcat.util.buf;
018:
019: import java.io.CharConversionException;
020: import java.io.IOException;
021:
022: /**
023: * All URL decoding happens here. This way we can reuse, review, optimize
024: * without adding complexity to the buffers.
025: *
026: * The conversion will modify the original buffer.
027: *
028: * @author Costin Manolache
029: */
030: public final class UDecoder {
031:
032: public UDecoder() {
033: }
034:
035: /** URLDecode, will modify the source. Includes converting
036: * '+' to ' '.
037: */
038: public void convert(ByteChunk mb) throws IOException {
039: convert(mb, true);
040: }
041:
042: /** URLDecode, will modify the source.
043: */
044: public void convert(ByteChunk mb, boolean query) throws IOException {
045: int start = mb.getOffset();
046: byte buff[] = mb.getBytes();
047: int end = mb.getEnd();
048:
049: int idx = ByteChunk.indexOf(buff, start, end, '%');
050: int idx2 = -1;
051: if (query)
052: idx2 = ByteChunk.indexOf(buff, start, end, '+');
053: if (idx < 0 && idx2 < 0) {
054: return;
055: }
056:
057: // idx will be the smallest positive inxes ( first % or + )
058: if (idx2 >= 0 && idx2 < idx)
059: idx = idx2;
060: if (idx < 0)
061: idx = idx2;
062:
063: for (int j = idx; j < end; j++, idx++) {
064: if (buff[j] == '+' && query) {
065: buff[idx] = (byte) ' ';
066: } else if (buff[j] != '%') {
067: buff[idx] = buff[j];
068: } else {
069: // read next 2 digits
070: if (j + 2 >= end) {
071: throw new CharConversionException("EOF");
072: }
073: byte b1 = buff[j + 1];
074: byte b2 = buff[j + 2];
075: if (!isHexDigit(b1) || !isHexDigit(b2))
076: throw new CharConversionException("isHexDigit");
077:
078: j += 2;
079: int res = x2c(b1, b2);
080: buff[idx] = (byte) res;
081: }
082: }
083:
084: mb.setEnd(idx);
085:
086: return;
087: }
088:
089: // -------------------- Additional methods --------------------
090: // XXX What do we do about charset ????
091:
092: /** In-buffer processing - the buffer will be modified
093: * Includes converting '+' to ' '.
094: */
095: public void convert(CharChunk mb) throws IOException {
096: convert(mb, true);
097: }
098:
099: /** In-buffer processing - the buffer will be modified
100: */
101: public void convert(CharChunk mb, boolean query) throws IOException {
102: // log( "Converting a char chunk ");
103: int start = mb.getOffset();
104: char buff[] = mb.getBuffer();
105: int cend = mb.getEnd();
106:
107: int idx = CharChunk.indexOf(buff, start, cend, '%');
108: int idx2 = -1;
109: if (query)
110: idx2 = CharChunk.indexOf(buff, start, cend, '+');
111: if (idx < 0 && idx2 < 0) {
112: return;
113: }
114:
115: if (idx2 >= 0 && idx2 < idx)
116: idx = idx2;
117: if (idx < 0)
118: idx = idx2;
119:
120: for (int j = idx; j < cend; j++, idx++) {
121: if (buff[j] == '+' && query) {
122: buff[idx] = (' ');
123: } else if (buff[j] != '%') {
124: buff[idx] = buff[j];
125: } else {
126: // read next 2 digits
127: if (j + 2 >= cend) {
128: // invalid
129: throw new CharConversionException("EOF");
130: }
131: char b1 = buff[j + 1];
132: char b2 = buff[j + 2];
133: if (!isHexDigit(b1) || !isHexDigit(b2))
134: throw new CharConversionException("isHexDigit");
135:
136: j += 2;
137: int res = x2c(b1, b2);
138: buff[idx] = (char) res;
139: }
140: }
141: mb.setEnd(idx);
142: }
143:
144: /** URLDecode, will modify the source
145: * Includes converting '+' to ' '.
146: */
147: public void convert(MessageBytes mb) throws IOException {
148: convert(mb, true);
149: }
150:
151: /** URLDecode, will modify the source
152: */
153: public void convert(MessageBytes mb, boolean query)
154: throws IOException {
155:
156: switch (mb.getType()) {
157: case MessageBytes.T_STR:
158: String strValue = mb.toString();
159: if (strValue == null)
160: return;
161: mb.setString(convert(strValue, query));
162: break;
163: case MessageBytes.T_CHARS:
164: CharChunk charC = mb.getCharChunk();
165: convert(charC, query);
166: break;
167: case MessageBytes.T_BYTES:
168: ByteChunk bytesC = mb.getByteChunk();
169: convert(bytesC, query);
170: break;
171: }
172: }
173:
174: // XXX Old code, needs to be replaced !!!!
175: //
176: public final String convert(String str) {
177: return convert(str, true);
178: }
179:
180: public final String convert(String str, boolean query) {
181: if (str == null)
182: return null;
183:
184: if ((!query || str.indexOf('+') < 0) && str.indexOf('%') < 0)
185: return str;
186:
187: StringBuffer dec = new StringBuffer(); // decoded string output
188: int strPos = 0;
189: int strLen = str.length();
190:
191: dec.ensureCapacity(str.length());
192: while (strPos < strLen) {
193: int laPos; // lookahead position
194:
195: // look ahead to next URLencoded metacharacter, if any
196: for (laPos = strPos; laPos < strLen; laPos++) {
197: char laChar = str.charAt(laPos);
198: if ((laChar == '+' && query) || (laChar == '%')) {
199: break;
200: }
201: }
202:
203: // if there were non-metacharacters, copy them all as a block
204: if (laPos > strPos) {
205: dec.append(str.substring(strPos, laPos));
206: strPos = laPos;
207: }
208:
209: // shortcut out of here if we're at the end of the string
210: if (strPos >= strLen) {
211: break;
212: }
213:
214: // process next metacharacter
215: char metaChar = str.charAt(strPos);
216: if (metaChar == '+') {
217: dec.append(' ');
218: strPos++;
219: continue;
220: } else if (metaChar == '%') {
221: // We throw the original exception - the super will deal with
222: // it
223: // try {
224: dec.append((char) Integer.parseInt(str.substring(
225: strPos + 1, strPos + 3), 16));
226: strPos += 3;
227: }
228: }
229:
230: return dec.toString();
231: }
232:
233: private static boolean isHexDigit(int c) {
234: return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
235: }
236:
237: private static int x2c(byte b1, byte b2) {
238: int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
239: digit *= 16;
240: digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
241: return digit;
242: }
243:
244: private static int x2c(char b1, char b2) {
245: int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
246: digit *= 16;
247: digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
248: return digit;
249: }
250:
251: private final static int debug = 0;
252:
253: private static void log(String s) {
254: System.out.println("URLDecoder: " + s);
255: }
256:
257: }
|