001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: *
017: * $Header:$
018: */
019: package org.apache.beehive.netui.pageflow.scoping.internal;
020:
021: import java.util.Map;
022: import java.util.StringTokenizer;
023: import java.io.UnsupportedEncodingException;
024:
025: class ParseUtils {
026: //-------------------------------------------------------------------------------------------------
027: // helpers to parse the query string.
028:
029: /**
030: * Parses an RFC1630 query string into an existing Map.
031: *
032: * @param str Query string
033: * @param res Map into which insert the values.
034: * @param encoding Encoding to be used for stored Strings
035: */
036: public static void parseQueryString(String str, Map res,
037: String encoding) {
038:
039: // "Within the query string, the plus sign is reserved as
040: // shorthand notation for a space. Therefore, real plus signs must
041: // be encoded. This method was used to make query URIs easier to
042: // pass in systems which did not allow spaces." -- RFC 1630
043: int i = str.indexOf('#');
044: if (i > 0) {
045: str = str.substring(0, i);
046: }
047: StringTokenizer st = new StringTokenizer(str.replace('+', ' '),
048: "&");
049:
050: while (st.hasMoreTokens()) {
051: String qp = st.nextToken();
052: String[] pair = qp.split("="); // was String[] pair = StringUtils.split(qp, '=');
053: //String s = unescape(pair[1], encoding);
054: res.put(unescape(pair[0], encoding), unescape(pair[1],
055: encoding));
056: }
057: }
058:
059: /**
060: * URI-unescapes the specified string, except for +/<space>
061: * encoding.
062: *
063: * @param str String to be unescaped
064: * @param encoding The name of a character encoding
065: * @return Unescaped string
066: */
067: private static String unescape(String str, String encoding) {
068: //We cannot unescape '+' to space because '+' is allowed in the file name
069: //str = str.replace('+', ' ');
070:
071: //if the str does not contain "%", we don't need to do anything
072: if (str.indexOf('%') < 0) {
073: return str;
074: }
075:
076: if (encoding == null || encoding.length() == 0) {
077: encoding = WLS_DEFAULT_ENCODING;
078: }
079:
080: // Do not assume String only contains ascii. str.length() <= str.getBytes().length
081: int out = 0;
082:
083: byte[] strbytes = str.getBytes();
084: int len = strbytes.length;
085:
086: boolean foundNonAscii = false;
087: for (int in = 0; in < len; in++, out++) {
088: if (strbytes[in] == '%' && (in + 2 < len)) {
089: if (Hex.isHexChar(strbytes[in + 1])
090: && Hex.isHexChar(strbytes[in + 2])) {
091: strbytes[out] = (byte) ((Hex
092: .hexValueOf(strbytes[in + 1]) << 4) + (Hex
093: .hexValueOf(strbytes[in + 2]) << 0));
094: in += 2;
095: continue;
096: }
097: }
098: // IE takes non-ASCII URLs. We use the default encoding
099: // if non-ASCII characters are contained in URLs.
100: if (!foundNonAscii
101: && (strbytes[in] <= 0x1f || strbytes[in] == 0x7f)) {
102: encoding = System.getProperty("file.encoding");
103: foundNonAscii = true;
104: }
105: strbytes[out] = strbytes[in];
106: }
107:
108: return newString(strbytes, 0, out, encoding); // was: BytesToString.newString(...)
109: }
110:
111: private static String newString(byte b[], int offset, int length,
112: String enc) {
113: if (is8BitUnicodeSubset(enc)) {
114: return getString(b, offset, length);
115: }
116: try {
117: return new String(b, offset, length, enc);
118: } catch (UnsupportedEncodingException uee) {
119: return getString(b, offset, length);
120: }
121: }
122:
123: private static boolean is8BitUnicodeSubset(String enc) {
124: return enc == null || "ISO-8859-1".equalsIgnoreCase(enc)
125: || "ISO8859_1".equalsIgnoreCase(enc)
126: || "ASCII".equalsIgnoreCase(enc);
127: }
128:
129: private static final String WLS_DEFAULT_ENCODING = "ISO-8859-1";
130:
131: private static String getString(byte b[], int offset, int length) {
132: try {
133: return new String(b, offset, length, WLS_DEFAULT_ENCODING);
134: } catch (UnsupportedEncodingException uee) {
135: // every JVM is supposed to support ISO-8859-1
136: throw new AssertionError(uee);
137: }
138: }
139:
140: static class Hex {
141:
142: // this class exists only for its static methods
143: private Hex() {
144: }
145:
146: public static int hexValueOf(int c) {
147: if (c >= '0' && c <= '9') {
148: return c - '0';
149: }
150: if (c >= 'a' && c <= 'f') {
151: return c - 'a' + 10;
152: }
153: if (c >= 'A' && c <= 'F') {
154: return c - 'A' + 10;
155: }
156: return 0;
157: }
158:
159: /**
160: * Test a character to see whether it is a possible hex char.
161: *
162: * @param c char (int actually) to test.
163: */
164: public static final boolean isHexChar(int c) {
165: // trade space for speed !!!!
166: switch (c) {
167: case '0':
168: case '1':
169: case '2':
170: case '3':
171: case '4':
172: case '5':
173: case '6':
174: case '7':
175: case '8':
176: case '9':
177: case 'a':
178: case 'b':
179: case 'c':
180: case 'd':
181: case 'e':
182: case 'f':
183: case 'A':
184: case 'B':
185: case 'C':
186: case 'D':
187: case 'E':
188: case 'F':
189: return true;
190: default:
191: return false;
192: }
193: }
194:
195: }
196: }
|