001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one
003: * or more contributor license agreements. See the NOTICE file
004: * distributed with this work for additional information
005: * regarding copyright ownership. The ASF licenses this file
006: * to you under the Apache License, Version 2.0 (the
007: * "License"); you may not use this file except in compliance
008: * with the License. You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing,
013: * software distributed under the License is distributed on an
014: * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015: * KIND, either express or implied. See the License for the
016: * specific language governing permissions and limitations
017: * under the License.
018: */
019:
020: package org.apache.axis2.transport.http.util;
021:
022: import java.io.ByteArrayOutputStream;
023: import java.io.UnsupportedEncodingException;
024: import java.net.URISyntaxException;
025:
026: public class URIEncoderDecoder {
027:
028: static final String digits = "0123456789ABCDEF"; //$NON-NLS-1$
029:
030: static final String encoding = "UTF8"; //$NON-NLS-1$
031:
032: /**
033: * Validate a string by checking if it contains any characters other than:
034: * <p/>
035: * 1. letters ('a'..'z', 'A'..'Z') 2. numbers ('0'..'9') 3. characters in
036: * the legalset parameter 4. others (Unicode characters that are not in
037: * US-ASCII set, and are not ISO Control or are not ISO Space characters)
038: * <p/>
039: * called from URI.Helper.parseURI() to validate each component
040: * <p/>
041: *
042: * @param s java.lang.String the string to be validated
043: * @param legal java.lang.String the characters allowed in the String s
044: */
045: static void validate(String s, String legal)
046: throws URISyntaxException {
047: for (int i = 0; i < s.length();) {
048: char ch = s.charAt(i);
049: if (ch == '%') {
050: do {
051: if (i + 2 >= s.length()) {
052: throw new URISyntaxException(s,
053: "Incomplete % sequence");
054: }
055: int d1 = Character.digit(s.charAt(i + 1), 16);
056: int d2 = Character.digit(s.charAt(i + 2), 16);
057: if (d1 == -1 || d2 == -1) {
058: throw new URISyntaxException(s,
059: "Invalid % sequence "
060: + s.substring(i, i + 3), i);
061: }
062:
063: i += 3;
064: } while (i < s.length() && s.charAt(i) == '%');
065:
066: continue;
067: }
068: if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
069: || (ch >= '0' && ch <= '9')
070: || legal.indexOf(ch) > -1 || (ch > 127
071: && !Character.isSpaceChar(ch) && !Character
072: .isISOControl(ch)))) {
073: throw new URISyntaxException(s, "Illegal character", i);
074: }
075: i++;
076: }
077: }
078:
079: static void validateSimple(String s, String legal)
080: throws URISyntaxException {
081: for (int i = 0; i < s.length();) {
082: char ch = s.charAt(i);
083: if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
084: || (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1)) {
085: throw new URISyntaxException(s, "Illegal character", i); //$NON-NLS-1$
086: }
087: i++;
088: }
089: }
090:
091: /**
092: * All characters except letters ('a'..'z', 'A'..'Z') and numbers ('0'..'9')
093: * and legal characters are converted into their hexidecimal value prepended
094: * by '%'.
095: * <p/>
096: * For example: '#' -> %23
097: * <p/>
098: * Other characters, which are Unicode chars that are not US-ASCII, and are
099: * not ISO Control or are not ISO Space chars, are preserved.
100: * <p/>
101: * Called from URI.quoteComponent() (for multiple argument constructors)
102: * <p/>
103: *
104: * @param s java.lang.String the string to be converted
105: * @param legal java.lang.String the characters allowed to be preserved in the
106: * string s
107: * @return java.lang.String the converted string
108: */
109: public static String quoteIllegal(String s, String legal)
110: throws UnsupportedEncodingException {
111: StringBuffer buf = new StringBuffer();
112: for (int i = 0; i < s.length(); i++) {
113: char ch = s.charAt(i);
114: if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
115: || (ch >= '0' && ch <= '9')
116: || legal.indexOf(ch) > -1) {
117: buf.append(ch);
118: } else {
119: byte[] bytes = new String(new char[] { ch })
120: .getBytes(encoding);
121: for (int j = 0; j < bytes.length; j++) {
122: buf.append('%');
123: buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
124: buf.append(digits.charAt(bytes[j] & 0xf));
125: }
126: }
127: }
128: return buf.toString();
129: }
130:
131: /**
132: * Other characters, which are Unicode chars that are not US-ASCII, and are
133: * not ISO Control or are not ISO Space chars are not preserved. They are
134: * converted into their hexidecimal value prepended by '%'.
135: * <p/>
136: * For example: Euro currency symbol -> "%E2%82%AC".
137: * <p/>
138: * Called from URI.toASCIIString()
139: * <p/>
140: *
141: * @param s java.lang.String the string to be converted
142: * @return java.lang.String the converted string
143: */
144: static String encodeOthers(String s)
145: throws UnsupportedEncodingException {
146: StringBuffer buf = new StringBuffer();
147: for (int i = 0; i < s.length(); i++) {
148: char ch = s.charAt(i);
149: if (ch <= 127) {
150: buf.append(ch);
151: } else {
152: byte[] bytes = new String(new char[] { ch })
153: .getBytes(encoding);
154: for (int j = 0; j < bytes.length; j++) {
155: buf.append('%');
156: buf.append(digits.charAt((bytes[j] & 0xf0) >> 4));
157: buf.append(digits.charAt(bytes[j] & 0xf));
158: }
159: }
160: }
161: return buf.toString();
162: }
163:
164: /**
165: * Decodes the string argument which is assumed to be encoded in the
166: * <code>x-www-form-urlencoded</code> MIME content type using the UTF-8
167: * encoding scheme.
168: * <p/>
169: * '%' and two following hex digit characters are converted to the
170: * equivalent byte value. All other characters are passed through
171: * unmodified.
172: * <p/>
173: * <p/>
174: * e.g. "A%20B%20C %24%25" -> "A B C $%"
175: * <p/>
176: * Called from URI.getXYZ() methods
177: * <p/>
178: *
179: * @param s java.lang.String The encoded string.
180: * @return java.lang.String The decoded version.
181: */
182: public static String decode(String s)
183: throws UnsupportedEncodingException {
184:
185: StringBuffer result = new StringBuffer();
186: ByteArrayOutputStream out = new ByteArrayOutputStream();
187: for (int i = 0; i < s.length();) {
188: char c = s.charAt(i);
189: if (c == '%') {
190: out.reset();
191: do {
192: if (i + 2 >= s.length()) {
193: throw new IllegalArgumentException(
194: "Incomplete % sequence at " + i);
195: }
196: int d1 = Character.digit(s.charAt(i + 1), 16);
197: int d2 = Character.digit(s.charAt(i + 2), 16);
198: if (d1 == -1 || d2 == -1) {
199: throw new IllegalArgumentException(
200: "Invalid % sequence"
201: + s.substring(i, i + 3) + "at "
202: + String.valueOf(i));
203: }
204: out.write((byte) ((d1 << 4) + d2));
205: i += 3;
206: } while (i < s.length() && s.charAt(i) == '%');
207: result.append(out.toString(encoding));
208: continue;
209: }
210: result.append(c);
211: i++;
212: }
213: return result.toString();
214: }
215:
216: }
|