001: /*
002: * ====================================================================
003: * Copyright (c) 2004-2008 TMate Software Ltd. All rights reserved.
004: *
005: * This software is licensed as described in the file COPYING, which
006: * you should have received as part of this distribution. The terms
007: * are also available at http://svnkit.com/license.html
008: * If newer versions of this license are posted there, you may use a
009: * newer version instead, at your option.
010: * ====================================================================
011: */
012: package org.tmatesoft.svn.core.internal.util;
013:
014: import java.io.ByteArrayOutputStream;
015: import java.io.UnsupportedEncodingException;
016: import java.util.Map;
017: import java.util.HashMap;
018:
019: import org.tmatesoft.svn.core.SVNErrorCode;
020: import org.tmatesoft.svn.core.SVNErrorMessage;
021: import org.tmatesoft.svn.core.SVNException;
022: import org.tmatesoft.svn.core.internal.wc.SVNErrorManager;
023:
024: /**
025: * @version 1.1.1
026: * @author TMate Software Ltd.
027: */
028: public class SVNEncodingUtil {
029:
030: public static String uriEncode(String src) {
031: StringBuffer sb = null;
032: byte[] bytes;
033: try {
034: bytes = src.getBytes("UTF-8");
035: } catch (UnsupportedEncodingException e) {
036: bytes = src.getBytes();
037: }
038: for (int i = 0; i < bytes.length; i++) {
039: int index = bytes[i] & 0xFF;
040: if (uri_char_validity[index] > 0) {
041: if (sb != null) {
042: sb.append((char) bytes[i]);
043: }
044: continue;
045: }
046: if (sb == null) {
047: sb = new StringBuffer();
048: sb.append(new String(bytes, 0, i));
049: }
050: sb.append("%");
051:
052: sb.append(Character.toUpperCase(Character.forDigit(
053: (index & 0xF0) >> 4, 16)));
054: sb.append(Character.toUpperCase(Character.forDigit(
055: index & 0x0F, 16)));
056: }
057: return sb == null ? src : sb.toString();
058: }
059:
060: public static String autoURIEncode(String src) {
061: StringBuffer sb = null;
062: byte[] bytes;
063: try {
064: bytes = src.getBytes("UTF-8");
065: } catch (UnsupportedEncodingException e) {
066: bytes = src.getBytes();
067: }
068: for (int i = 0; i < bytes.length; i++) {
069: int index = bytes[i] & 0xFF;
070: if (uri_char_validity[index] > 0) {
071: if (sb != null) {
072: sb.append((char) bytes[i]);
073: }
074: continue;
075: } else if (index == '%' && i + 2 < bytes.length
076: && isHexDigit((char) bytes[i + 1])
077: && isHexDigit((char) bytes[i + 2])) {
078: if (sb != null) {
079: sb.append((char) bytes[i]);
080: }
081: // digits will be processed fine.
082: continue;
083: }
084: if (sb == null) {
085: sb = new StringBuffer();
086: sb.append(new String(bytes, 0, i));
087: }
088: sb.append("%");
089:
090: sb.append(Character.toUpperCase(Character.forDigit(
091: (index & 0xF0) >> 4, 16)));
092: sb.append(Character.toUpperCase(Character.forDigit(
093: index & 0x0F, 16)));
094: }
095: return sb == null ? src : sb.toString();
096: }
097:
098: public static void assertURISafe(String path) throws SVNException {
099: path = path == null ? "" : path;
100: byte[] bytes;
101: try {
102: bytes = path.getBytes("UTF-8");
103: } catch (UnsupportedEncodingException e) {
104: SVNErrorMessage err = SVNErrorMessage.create(
105: SVNErrorCode.BAD_URL,
106: "path ''{0}'' could not be encoded as UTF-8", path);
107: SVNErrorManager.error(err);
108: return;
109: }
110: if (bytes == null || bytes.length != path.length()) {
111: SVNErrorMessage err = SVNErrorMessage
112: .create(
113: SVNErrorCode.BAD_URL,
114: "path ''{0}'' doesn not look like URI-encoded path",
115: path);
116: SVNErrorManager.error(err);
117: }
118: for (int i = 0; i < bytes.length; i++) {
119: if (uri_char_validity[bytes[i]] <= 0 && bytes[i] != '%') {
120: SVNErrorMessage err = SVNErrorMessage
121: .create(
122: SVNErrorCode.BAD_URL,
123: "path ''{0}'' doesn not look like URI-encoded path; character ''{1}'' is URI unsafe",
124: new Object[] { path,
125: ((char) bytes[i]) + "" });
126: SVNErrorManager.error(err);
127: }
128: }
129: return;
130: }
131:
132: public static String uriDecode(String src) {
133: // this is string in ASCII-US encoding.
134: boolean query = false;
135: boolean decoded = false;
136: int length = src.length();
137: ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
138: for (int i = 0; i < length; i++) {
139: byte ch = (byte) src.charAt(i);
140: if (ch == '?') {
141: query = true;
142: } else if (ch == '+' && query) {
143: ch = ' ';
144: } else if (ch == '%' && i + 2 < length
145: && isHexDigit(src.charAt(i + 1))
146: && isHexDigit(src.charAt(i + 2))) {
147: ch = (byte) (hexValue(src.charAt(i + 1)) * 0x10 + hexValue(src
148: .charAt(i + 2)));
149: decoded = true;
150: i += 2;
151: } else {
152: // if character is not URI-safe try to encode it.
153: }
154: bos.write(ch);
155: }
156: if (!decoded) {
157: return src;
158: }
159: try {
160: return new String(bos.toByteArray(), "UTF-8");
161: } catch (UnsupportedEncodingException e) {
162: }
163: return src;
164: }
165:
166: public static String xmlEncodeCDATA(String src) {
167: StringBuffer sb = null;
168: for (int i = 0; i < src.length(); i++) {
169: char ch = src.charAt(i);
170: switch (ch) {
171: case '&':
172: if (sb == null) {
173: sb = createStringBuffer(src, i);
174: }
175: sb.append("&");
176: break;
177: case '<':
178: if (sb == null) {
179: sb = createStringBuffer(src, i);
180: }
181: sb.append("<");
182: break;
183: case '>':
184: if (sb == null) {
185: sb = createStringBuffer(src, i);
186: }
187: sb.append(">");
188: break;
189: case '\r':
190: if (sb == null) {
191: sb = createStringBuffer(src, i);
192: }
193: sb.append(" ");
194: break;
195: default:
196: if (sb != null) {
197: sb.append(ch);
198: }
199: }
200: }
201: return sb != null ? sb.toString() : src;
202: }
203:
204: public static String xmlEncodeAttr(String src) {
205: StringBuffer sb = new StringBuffer(src.length());
206: for (int i = 0; i < src.length(); i++) {
207: char ch = src.charAt(i);
208: switch (ch) {
209: case '&':
210: if (sb == null) {
211: sb = createStringBuffer(src, i);
212: }
213: sb.append("&");
214: break;
215: case '<':
216: if (sb == null) {
217: sb = createStringBuffer(src, i);
218: }
219: sb.append("<");
220: break;
221: case '>':
222: if (sb == null) {
223: sb = createStringBuffer(src, i);
224: }
225: sb.append(">");
226: break;
227: case '\'':
228: if (sb == null) {
229: sb = createStringBuffer(src, i);
230: }
231: sb.append("'");
232: break;
233: case '\"':
234: if (sb == null) {
235: sb = createStringBuffer(src, i);
236: }
237: sb.append(""");
238: break;
239: case '\r':
240: if (sb == null) {
241: sb = createStringBuffer(src, i);
242: }
243: sb.append(" ");
244: break;
245: case '\n':
246: if (sb == null) {
247: sb = createStringBuffer(src, i);
248: }
249: sb.append(" ");
250: break;
251: case '\t':
252: if (sb == null) {
253: sb = createStringBuffer(src, i);
254: }
255: sb.append("	");
256: break;
257: default:
258: if (sb != null) {
259: sb.append(ch);
260: }
261: }
262: }
263: return sb != null ? sb.toString() : src;
264: }
265:
266: public static boolean isXMLSafe(String value) {
267: for (int i = 0; i < value.length(); i++) {
268: char ch = value.charAt(i);
269: if (ch < 0x20 && ch != 0x0A && ch != 0x0D && ch != 0x09
270: && ch != 0x08) {
271: return false;
272: }
273: }
274: return true;
275: }
276:
277: private static final Map XML_UNESCAPE_MAP = new HashMap();
278:
279: static {
280: XML_UNESCAPE_MAP.put("&", "&");
281: XML_UNESCAPE_MAP.put("<", "<");
282: XML_UNESCAPE_MAP.put(">", ">");
283: XML_UNESCAPE_MAP.put(""", "\"");
284: XML_UNESCAPE_MAP.put("'", "'");
285: XML_UNESCAPE_MAP.put(" ", "\r");
286: XML_UNESCAPE_MAP.put(" ", "\n");
287: XML_UNESCAPE_MAP.put("	", "\t");
288: }
289:
290: public static String xmlDecode(String value) {
291: StringBuffer result = new StringBuffer(value.length());
292: int l = value.length();
293: for (int i = 0; i < l; i++) {
294: char ch = value.charAt(i);
295: if (ch == '&') {
296: String replacement = null;
297: for (int j = i + 1; j < i + 6 && j < l; j++) {
298: if (value.charAt(j) == ';' && j - i > 1) {
299: String escape = value.substring(i, j + 1); // full
300: replacement = (String) XML_UNESCAPE_MAP
301: .get(escape);
302: if (replacement != null) {
303: result.append(replacement);
304: i = j;
305: }
306: break;
307: }
308: }
309: if (replacement != null) {
310: continue;
311: }
312: }
313: result.append(ch);
314: }
315: return result.toString();
316: }
317:
318: public static String fuzzyEscape(String str) {
319: byte[] bytes = str.getBytes(); // native encoding
320: StringBuffer result = createStringBuffer(str, 0);
321: for (int i = 0; i < bytes.length; i++) {
322: if (bytes[i] >= 0) {
323: result.append((char) bytes[i]);
324: } else {
325: result.append("?\\");
326: result.append((256 - (-bytes[i]))); // get positive code (256 - b).
327: }
328: }
329: return result.toString();
330: }
331:
332: public static boolean isHexDigit(char ch) {
333: return Character.isDigit(ch)
334: || (Character.toUpperCase(ch) >= 'A' && Character
335: .toUpperCase(ch) <= 'F');
336: }
337:
338: public static boolean isASCIIControlChar(char ch) {
339: return (ch >= 0x00 && ch <= 0x1f) || ch == 0x7f;
340: }
341:
342: private static int hexValue(char ch) {
343: if (Character.isDigit(ch)) {
344: return ch - '0';
345: }
346: ch = Character.toUpperCase(ch);
347: return (ch - 'A') + 0x0A;
348: }
349:
350: private static StringBuffer createStringBuffer(String src,
351: int length) {
352: StringBuffer sb = new StringBuffer(src.length());
353: sb.append(src.toCharArray(), 0, length);
354: return sb;
355: }
356:
357: private static final byte[] uri_char_validity = new byte[] { 0, 0,
358: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
359: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
360: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
361: 0, 0,
362:
363: /* 64 */
364: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365: 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
366: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
367: 0, 0, 1, 0,
368:
369: /* 128 */
370: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
371: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
372: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
373: 0, 0, 0, 0,
374:
375: /* 192 */
376: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379: 0, 0, 0, 0, };
380: }
|