001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.tomcat.util.buf;
019:
020: import java.io.CharConversionException;
021: import java.io.IOException;
022:
023: /**
024: * All URL decoding happens here. This way we can reuse, review, optimize
025: * without adding complexity to the buffers.
026: *
027: * The conversion will modify the original buffer.
028: *
029: * @author Costin Manolache
030: */
031: public final class UDecoder {
032:
033: private static org.apache.juli.logging.Log log = org.apache.juli.logging.LogFactory
034: .getLog(UDecoder.class);
035:
036: protected static final boolean ALLOW_ENCODED_SLASH = Boolean
037: .valueOf(
038: System
039: .getProperty(
040: "org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH",
041: "false")).booleanValue();
042:
043: public UDecoder() {
044: }
045:
046: /** URLDecode, will modify the source. Includes converting
047: * '+' to ' '.
048: */
049: public void convert(ByteChunk mb) throws IOException {
050: convert(mb, true);
051: }
052:
053: /** URLDecode, will modify the source.
054: */
055: public void convert(ByteChunk mb, boolean query) throws IOException {
056: int start = mb.getOffset();
057: byte buff[] = mb.getBytes();
058: int end = mb.getEnd();
059:
060: int idx = ByteChunk.indexOf(buff, start, end, '%');
061: int idx2 = -1;
062: if (query)
063: idx2 = ByteChunk.indexOf(buff, start, end, '+');
064: if (idx < 0 && idx2 < 0) {
065: return;
066: }
067:
068: // idx will be the smallest positive inxes ( first % or + )
069: if (idx2 >= 0 && idx2 < idx)
070: idx = idx2;
071: if (idx < 0)
072: idx = idx2;
073:
074: boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
075:
076: for (int j = idx; j < end; j++, idx++) {
077: if (buff[j] == '+' && query) {
078: buff[idx] = (byte) ' ';
079: } else if (buff[j] != '%') {
080: buff[idx] = buff[j];
081: } else {
082: // read next 2 digits
083: if (j + 2 >= end) {
084: throw new CharConversionException("EOF");
085: }
086: byte b1 = buff[j + 1];
087: byte b2 = buff[j + 2];
088: if (!isHexDigit(b1) || !isHexDigit(b2))
089: throw new CharConversionException("isHexDigit");
090:
091: j += 2;
092: int res = x2c(b1, b2);
093: if (noSlash && (res == '/')) {
094: throw new CharConversionException("noSlash");
095: }
096: buff[idx] = (byte) res;
097: }
098: }
099:
100: mb.setEnd(idx);
101:
102: return;
103: }
104:
105: // -------------------- Additional methods --------------------
106: // XXX What do we do about charset ????
107:
108: /** In-buffer processing - the buffer will be modified
109: * Includes converting '+' to ' '.
110: */
111: public void convert(CharChunk mb) throws IOException {
112: convert(mb, true);
113: }
114:
115: /** In-buffer processing - the buffer will be modified
116: */
117: public void convert(CharChunk mb, boolean query) throws IOException {
118: // log( "Converting a char chunk ");
119: int start = mb.getOffset();
120: char buff[] = mb.getBuffer();
121: int cend = mb.getEnd();
122:
123: int idx = CharChunk.indexOf(buff, start, cend, '%');
124: int idx2 = -1;
125: if (query)
126: idx2 = CharChunk.indexOf(buff, start, cend, '+');
127: if (idx < 0 && idx2 < 0) {
128: return;
129: }
130:
131: if (idx2 >= 0 && idx2 < idx)
132: idx = idx2;
133: if (idx < 0)
134: idx = idx2;
135:
136: for (int j = idx; j < cend; j++, idx++) {
137: if (buff[j] == '+' && query) {
138: buff[idx] = (' ');
139: } else if (buff[j] != '%') {
140: buff[idx] = buff[j];
141: } else {
142: // read next 2 digits
143: if (j + 2 >= cend) {
144: // invalid
145: throw new CharConversionException("EOF");
146: }
147: char b1 = buff[j + 1];
148: char b2 = buff[j + 2];
149: if (!isHexDigit(b1) || !isHexDigit(b2))
150: throw new CharConversionException("isHexDigit");
151:
152: j += 2;
153: int res = x2c(b1, b2);
154: buff[idx] = (char) res;
155: }
156: }
157: mb.setEnd(idx);
158: }
159:
160: /** URLDecode, will modify the source
161: * Includes converting '+' to ' '.
162: */
163: public void convert(MessageBytes mb) throws IOException {
164: convert(mb, true);
165: }
166:
167: /** URLDecode, will modify the source
168: */
169: public void convert(MessageBytes mb, boolean query)
170: throws IOException {
171:
172: switch (mb.getType()) {
173: case MessageBytes.T_STR:
174: String strValue = mb.toString();
175: if (strValue == null)
176: return;
177: mb.setString(convert(strValue, query));
178: break;
179: case MessageBytes.T_CHARS:
180: CharChunk charC = mb.getCharChunk();
181: convert(charC, query);
182: break;
183: case MessageBytes.T_BYTES:
184: ByteChunk bytesC = mb.getByteChunk();
185: convert(bytesC, query);
186: break;
187: }
188: }
189:
190: // XXX Old code, needs to be replaced !!!!
191: //
192: public final String convert(String str) {
193: return convert(str, true);
194: }
195:
196: public final String convert(String str, boolean query) {
197: if (str == null)
198: return null;
199:
200: if ((!query || str.indexOf('+') < 0) && str.indexOf('%') < 0)
201: return str;
202:
203: StringBuffer dec = new StringBuffer(); // decoded string output
204: int strPos = 0;
205: int strLen = str.length();
206:
207: dec.ensureCapacity(str.length());
208: while (strPos < strLen) {
209: int laPos; // lookahead position
210:
211: // look ahead to next URLencoded metacharacter, if any
212: for (laPos = strPos; laPos < strLen; laPos++) {
213: char laChar = str.charAt(laPos);
214: if ((laChar == '+' && query) || (laChar == '%')) {
215: break;
216: }
217: }
218:
219: // if there were non-metacharacters, copy them all as a block
220: if (laPos > strPos) {
221: dec.append(str.substring(strPos, laPos));
222: strPos = laPos;
223: }
224:
225: // shortcut out of here if we're at the end of the string
226: if (strPos >= strLen) {
227: break;
228: }
229:
230: // process next metacharacter
231: char metaChar = str.charAt(strPos);
232: if (metaChar == '+') {
233: dec.append(' ');
234: strPos++;
235: continue;
236: } else if (metaChar == '%') {
237: // We throw the original exception - the super will deal with
238: // it
239: // try {
240: dec.append((char) Integer.parseInt(str.substring(
241: strPos + 1, strPos + 3), 16));
242: strPos += 3;
243: }
244: }
245:
246: return dec.toString();
247: }
248:
249: private static boolean isHexDigit(int c) {
250: return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
251: }
252:
253: private static int x2c(byte b1, byte b2) {
254: int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
255: digit *= 16;
256: digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
257: return digit;
258: }
259:
260: private static int x2c(char b1, char b2) {
261: int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
262: digit *= 16;
263: digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
264: return digit;
265: }
266:
267: private final static int debug = 0;
268:
269: private static void log(String s) {
270: if (log.isDebugEnabled())
271: log.debug("URLDecoder: " + s);
272: }
273:
274: }
|