001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.net;
019:
020: import java.io.ByteArrayOutputStream;
021: import java.io.UnsupportedEncodingException;
022:
023: import org.apache.harmony.luni.util.Msg;
024:
025: /**
026: * This class is used to encode a string using the format required by
027: * <code>application/x-www-form-urlencoded</code> MIME content type.
028: *
029: * It contains helper methods used by the URI class, and performs encoding and
030: * decoding in a slightly different way than URLEncoder and URLDecoder.
031: */
032: class URIEncoderDecoder {
033:
034: static final String digits = "0123456789ABCDEF"; //$NON-NLS-1$
035:
036: static final String encoding = "UTF8"; //$NON-NLS-1$
037:
038: /**
039: * Validate a string by checking if it contains any characters other than:
040: *
041: * 1. letters ('a'..'z', 'A'..'Z') 2. numbers ('0'..'9') 3. characters in
042: * the legalset parameter 4. others (Unicode characters that are not in
043: * US-ASCII set, and are not ISO Control or are not ISO Space characters)
044: * <p>
045: * called from URI.Helper.parseURI() to validate each component
046: * <p>
047: *
048: * @param s
049: * java.lang.String the string to be validated
050: * @param legal
051: * java.lang.String the characters allowed in the String s
052: *
053: */
054: static void validate(String s, String legal)
055: throws URISyntaxException {
056: for (int i = 0; i < s.length();) {
057: char ch = s.charAt(i);
058: if (ch == '%') {
059: do {
060: if (i + 2 >= s.length()) {
061: throw new URISyntaxException(s, Msg
062: .getString("K0313"), //$NON-NLS-1$
063: i);
064: }
065: int d1 = Character.digit(s.charAt(i + 1), 16);
066: int d2 = Character.digit(s.charAt(i + 2), 16);
067: if (d1 == -1 || d2 == -1) {
068: throw new URISyntaxException(s, Msg.getString(
069: "K0314", //$NON-NLS-1$
070: s.substring(i, i + 3)), i);
071: }
072:
073: i += 3;
074: } while (i < s.length() && s.charAt(i) == '%');
075:
076: continue;
077: }
078: if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
079: || (ch >= '0' && ch <= '9')
080: || legal.indexOf(ch) > -1 || (ch > 127
081: && !Character.isSpaceChar(ch) && !Character
082: .isISOControl(ch)))) {
083: throw new URISyntaxException(s,
084: Msg.getString("K00c1"), i); //$NON-NLS-1$
085: }
086: i++;
087: }
088: }
089:
090: static void validateSimple(String s, String legal)
091: throws URISyntaxException {
092: for (int i = 0; i < s.length();) {
093: char ch = s.charAt(i);
094: if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
095: || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1)) {
096: throw new URISyntaxException(s,
097: Msg.getString("K00c1"), i); //$NON-NLS-1$
098: }
099: i++;
100: }
101: }
102:
103: /**
104: * All characters except letters ('a'..'z', 'A'..'Z') and numbers ('0'..'9')
105: * and legal characters are converted into their hexidecimal value prepended
106: * by '%'.
107: * <p>
108: * For example: '#' -> %23
109: * <p>
110: * Other characters, which are Unicode chars that are not US-ASCII, and are
111: * not ISO Control or are not ISO Space chars, are preserved.
112: * <p>
113: * Called from URI.quoteComponent() (for multiple argument constructors)
114: * <p>
115: *
116: * @param s
117: * java.lang.String the string to be converted
118: * @param legal
119: * java.lang.String the characters allowed to be preserved in the
120: * string s
121: * @return java.lang.String the converted string
122: */
123: static String quoteIllegal(String s, String legal)
124: throws UnsupportedEncodingException {
125: StringBuffer buf = new StringBuffer();
126: for (int i = 0; i < s.length(); i++) {
127: char ch = s.charAt(i);
128: if ((ch >= 'a' && ch <= 'z')
129: || (ch >= 'A' && ch <= 'Z')
130: || (ch >= '0' && ch <= '9')
131: || legal.indexOf(ch) > -1
132: || (ch > 127 && !Character.isSpaceChar(ch) && !Character
133: .isISOControl(ch))) {
134: buf.append(ch);
135: } else {
136: byte[] bytes = new String(new char[] { ch })
137: .getBytes(encoding);
138: for (int j = 0; j < bytes.length; j++) {
139: buf.append('%');
140: buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
141: buf.append(digits.charAt(bytes[j] & 0xf));
142: }
143: }
144: }
145: return buf.toString();
146: }
147:
148: /**
149: * Other characters, which are Unicode chars that are not US-ASCII, and are
150: * not ISO Control or are not ISO Space chars are not preserved. They are
151: * converted into their hexidecimal value prepended by '%'.
152: * <p>
153: * For example: Euro currency symbol -> "%E2%82%AC".
154: * <p>
155: * Called from URI.toASCIIString()
156: * <p>
157: *
158: * @param s
159: * java.lang.String the string to be converted
160: * @return java.lang.String the converted string
161: */
162: static String encodeOthers(String s)
163: throws UnsupportedEncodingException {
164: StringBuffer buf = new StringBuffer();
165: for (int i = 0; i < s.length(); i++) {
166: char ch = s.charAt(i);
167: if (ch <= 127) {
168: buf.append(ch);
169: } else {
170: byte[] bytes = new String(new char[] { ch })
171: .getBytes(encoding);
172: for (int j = 0; j < bytes.length; j++) {
173: buf.append('%');
174: buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
175: buf.append(digits.charAt(bytes[j] & 0xf));
176: }
177: }
178: }
179: return buf.toString();
180: }
181:
182: /**
183: * Decodes the string argument which is assumed to be encoded in the
184: * <code>x-www-form-urlencoded</code> MIME content type using the UTF-8
185: * encoding scheme.
186: * <p>
187: * '%' and two following hex digit characters are converted to the
188: * equivalent byte value. All other characters are passed through
189: * unmodified.
190: *
191: * <p>
192: * e.g. "A%20B%20C %24%25" -> "A B C $%"
193: * <p>
194: * Called from URI.getXYZ() methods
195: * <p>
196: *
197: * @param s
198: * java.lang.String The encoded string.
199: * @return java.lang.String The decoded version.
200: */
201: static String decode(String s) throws UnsupportedEncodingException {
202:
203: StringBuffer result = new StringBuffer();
204: ByteArrayOutputStream out = new ByteArrayOutputStream();
205: for (int i = 0; i < s.length();) {
206: char c = s.charAt(i);
207: if (c == '%') {
208: out.reset();
209: do {
210: if (i + 2 >= s.length()) {
211: throw new IllegalArgumentException(Msg
212: .getString("K01fe", i)); //$NON-NLS-1$
213: }
214: int d1 = Character.digit(s.charAt(i + 1), 16);
215: int d2 = Character.digit(s.charAt(i + 2), 16);
216: if (d1 == -1 || d2 == -1) {
217: throw new IllegalArgumentException(Msg
218: .getString(
219: "K01ff", s.substring(i, i + 3), //$NON-NLS-1$
220: String.valueOf(i)));
221: }
222: out.write((byte) ((d1 << 4) + d2));
223: i += 3;
224: } while (i < s.length() && s.charAt(i) == '%');
225: result.append(out.toString(encoding));
226: continue;
227: }
228: result.append(c);
229: i++;
230: }
231: return result.toString();
232: }
233:
234: }
|