001: /*
002: * Copyright 2005 Joe Walker
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.directwebremoting.util;
017:
018: import java.util.Arrays;
019: import java.util.Locale;
020: import java.util.SortedSet;
021: import java.util.TreeSet;
022:
023: /**
024: * Various Javascript code utilities.
025: * The escape classes were taken from jakarta-commons-lang which in turn
026: * borrowed from Turbine and other projects. The list of authors below is almost
027: * certainly far too long, but I'm not sure who really wrote these methods.
028: * @author Joe Walker [joe at getahead dot ltd dot uk]
029: * @author Henri Yandell
030: * @author Alexander Day Chaffee
031: * @author Antony Riley
032: * @author Helge Tesgaard
033: * @author Sean Brown
034: * @author Gary Gregory
035: * @author Phil Steitz
036: * @author Pete Gieser
037: */
038: public class JavascriptUtil {
039: /**
040: * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
041: * <p>Escapes any values it finds into their JavaScript String form.
042: * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
043: *
044: * <p>So a tab becomes the characters <code>'\\'</code> and
045: * <code>'t'</code>.</p>
046: *
047: * <p>The only difference between Java strings and JavaScript strings
048: * is that in JavaScript, a single quote must be escaped.</p>
049: *
050: * <p>Example:
051: * <pre>
052: * input string: He didn't say, "Stop!"
053: * output string: He didn\'t say, \"Stop!\"
054: * </pre>
055: * </p>
056: *
057: * @param str String to escape values in, may be null
058: * @return String with escaped values, <code>null</code> if null string input
059: */
060: public static String escapeJavaScript(String str) {
061: if (str == null) {
062: return null;
063: }
064:
065: StringBuffer writer = new StringBuffer(str.length() * 2);
066:
067: int sz = str.length();
068: for (int i = 0; i < sz; i++) {
069: char ch = str.charAt(i);
070:
071: // handle unicode
072: if (ch > 0xfff) {
073: writer.append("\\u");
074: writer.append(hex(ch));
075: } else if (ch > 0xff) {
076: writer.append("\\u0");
077: writer.append(hex(ch));
078: } else if (ch > 0x7f) {
079: writer.append("\\u00");
080: writer.append(hex(ch));
081: } else if (ch < 32) {
082: switch (ch) {
083: case '\b':
084: writer.append('\\');
085: writer.append('b');
086: break;
087: case '\n':
088: writer.append('\\');
089: writer.append('n');
090: break;
091: case '\t':
092: writer.append('\\');
093: writer.append('t');
094: break;
095: case '\f':
096: writer.append('\\');
097: writer.append('f');
098: break;
099: case '\r':
100: writer.append('\\');
101: writer.append('r');
102: break;
103: default:
104: if (ch > 0xf) {
105: writer.append("\\u00");
106: writer.append(hex(ch));
107: } else {
108: writer.append("\\u000");
109: writer.append(hex(ch));
110: }
111: break;
112: }
113: } else {
114: switch (ch) {
115: case '\'':
116: // If we wanted to escape for Java strings then we would
117: // not need this next line.
118: writer.append('\\');
119: writer.append('\'');
120: break;
121: case '"':
122: writer.append('\\');
123: writer.append('"');
124: break;
125: case '\\':
126: writer.append('\\');
127: writer.append('\\');
128: break;
129: default:
130: writer.append(ch);
131: break;
132: }
133: }
134: }
135:
136: return writer.toString();
137: }
138:
139: /**
140: * <p>Returns an upper case hexadecimal <code>String</code> for the given
141: * character.</p>
142: * @param ch The character to convert.
143: * @return An upper case hexadecimal <code>String</code>
144: */
145: private static String hex(char ch) {
146: return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
147: }
148:
149: /**
150: * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
151: * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
152: * into a newline character, unless the <code>'\'</code> is preceded by another
153: * <code>'\'</code>.</p>
154: * @param str the <code>String</code> to unescape, may be null
155: * @return A new unescaped <code>String</code>, <code>null</code> if null string input
156: */
157: public static String unescapeJavaScript(String str) {
158: if (str == null) {
159: return null;
160: }
161:
162: StringBuffer writer = new StringBuffer(str.length());
163: int sz = str.length();
164: StringBuffer unicode = new StringBuffer(4);
165: boolean hadSlash = false;
166: boolean inUnicode = false;
167:
168: for (int i = 0; i < sz; i++) {
169: char ch = str.charAt(i);
170: if (inUnicode) {
171: // if in unicode, then we're reading unicode
172: // values in somehow
173: unicode.append(ch);
174: if (unicode.length() == 4) {
175: // unicode now contains the four hex digits
176: // which represents our unicode character
177: try {
178: int value = Integer.parseInt(
179: unicode.toString(), 16);
180: writer.append((char) value);
181: unicode.setLength(0);
182: inUnicode = false;
183: hadSlash = false;
184: } catch (NumberFormatException nfe) {
185: throw new IllegalArgumentException(
186: "Unable to parse unicode value: "
187: + unicode + " cause: " + nfe);
188: }
189: }
190: continue;
191: }
192:
193: if (hadSlash) {
194: // handle an escaped value
195: hadSlash = false;
196: switch (ch) {
197: case '\\':
198: writer.append('\\');
199: break;
200: case '\'':
201: writer.append('\'');
202: break;
203: case '\"':
204: writer.append('"');
205: break;
206: case 'r':
207: writer.append('\r');
208: break;
209: case 'f':
210: writer.append('\f');
211: break;
212: case 't':
213: writer.append('\t');
214: break;
215: case 'n':
216: writer.append('\n');
217: break;
218: case 'b':
219: writer.append('\b');
220: break;
221: case 'u':
222: // uh-oh, we're in unicode country....
223: inUnicode = true;
224: break;
225: default:
226: writer.append(ch);
227: break;
228: }
229: continue;
230: } else if (ch == '\\') {
231: hadSlash = true;
232: continue;
233: }
234: writer.append(ch);
235: }
236:
237: if (hadSlash) {
238: // then we're in the weird case of a \ at the end of the
239: // string, let's output it anyway.
240: writer.append('\\');
241: }
242:
243: return writer.toString();
244: }
245:
246: /**
247: * Check to see if the given word is reserved or a bad idea in any known
248: * version of JavaScript.
249: * @param name The word to check
250: * @return false if the word is not reserved
251: */
252: public static boolean isReservedWord(String name) {
253: return reserved.contains(name);
254: }
255:
256: /**
257: * The array of javascript reserved words
258: */
259: private static final String[] RESERVED_ARRAY = new String[] {
260: // Reserved and used at ECMAScript 4
261: "as", "break", "case", "catch", "class", "const",
262: "continue", "default", "delete", "do", "else", "export",
263: "extends", "false", "finally", "for", "function", "if",
264: "import", "in", "instanceof", "is", "namespace", "new",
265: "null", "package", "private", "public", "return", "super",
266: "switch", "this", "throw", "true", "try", "typeof", "use",
267: "var", "void",
268: "while",
269: "with",
270: // Reserved for future use at ECMAScript 4
271: "abstract", "debugger", "enum", "goto", "implements",
272: "interface", "native", "protected", "synchronized",
273: "throws", "transient", "volatile",
274: // Reserved in ECMAScript 3, unreserved at 4 best to avoid anyway
275: "boolean", "byte", "char", "double", "final", "float",
276: "int", "long", "short", "static",
277:
278: // I have seen the following list as 'best avoided for function names'
279: // but it seems way to all encompassing, so I'm not going to include it
280: /*
281: "alert", "anchor", "area", "arguments", "array", "assign", "blur",
282: "boolean", "button", "callee", "caller", "captureevents", "checkbox",
283: "clearinterval", "cleartimeout", "close", "closed", "confirm",
284: "constructor", "date", "defaultstatus", "document", "element", "escape",
285: "eval", "fileupload", "find", "focus", "form", "frame", "frames",
286: "getclass", "hidden", "history", "home", "image", "infinity",
287: "innerheight", "isfinite", "innerwidth", "isnan", "java", "javaarray",
288: "javaclass", "javaobject", "javapackage", "length", "link", "location",
289: "locationbar", "math", "menubar", "mimetype", "moveby", "moveto",
290: "name", "nan", "navigate", "navigator", "netscape", "number", "object",
291: "onblur", "onerror", "onfocus", "onload", "onunload", "open", "opener",
292: "option", "outerheight", "outerwidth", "packages", "pagexoffset",
293: "pageyoffset", "parent", "parsefloat", "parseint", "password",
294: "personalbar", "plugin", "print", "prompt", "prototype", "radio", "ref",
295: "regexp", "releaseevents", "reset", "resizeby", "resizeto",
296: "routeevent", "scroll", "scrollbars", "scrollby", "scrollto", "select",
297: "self", "setinterval", "settimeout", "status", "statusbar", "stop",
298: "string", "submit", "sun", "taint", "text", "textarea", "toolbar",
299: "top", "tostring", "unescape", "untaint", "unwatch", "valueof", "watch",
300: "window",
301: */
302: };
303:
304: /**
305: * The list of reserved words
306: */
307: private static SortedSet<String> reserved = new TreeSet<String>();
308:
309: /**
310: * For easy access ...
311: */
312: static {
313: // The Javascript reserved words array so we don't generate illegal javascript
314: reserved.addAll(Arrays.asList(RESERVED_ARRAY));
315: }
316: }
|