001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.lang;
018:
019: import java.io.IOException;
020: import java.io.StringWriter;
021: import java.io.Writer;
022:
023: import org.apache.commons.lang.exception.NestableRuntimeException;
024:
025: /**
026: * <p>Escapes and unescapes <code>String</code>s for
027: * Java, Java Script, HTML, XML, and SQL.</p>
028: *
029: * @author Apache Jakarta Turbine
030: * @author Purple Technology
031: * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
032: * @author Antony Riley
033: * @author Helge Tesgaard
034: * @author <a href="sean@boohai.com">Sean Brown</a>
035: * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
036: * @author Phil Steitz
037: * @author Pete Gieser
038: * @since 2.0
039: * @version $Id: StringEscapeUtils.java 471626 2006-11-06 04:02:09Z bayard $
040: */
041: public class StringEscapeUtils {
042:
043: /**
044: * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
045: * standard programming.</p>
046: *
047: * <p>Instead, the class should be used as:
048: * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
049: *
050: * <p>This constructor is public to permit tools that require a JavaBean
051: * instance to operate.</p>
052: */
053: public StringEscapeUtils() {
054: super ();
055: }
056:
057: // Java and JavaScript
058: //--------------------------------------------------------------------------
059: /**
060: * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
061: *
062: * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
063: *
064: * <p>So a tab becomes the characters <code>'\\'</code> and
065: * <code>'t'</code>.</p>
066: *
067: * <p>The only difference between Java strings and JavaScript strings
068: * is that in JavaScript, a single quote must be escaped.</p>
069: *
070: * <p>Example:
071: * <pre>
072: * input string: He didn't say, "Stop!"
073: * output string: He didn't say, \"Stop!\"
074: * </pre>
075: * </p>
076: *
077: * @param str String to escape values in, may be null
078: * @return String with escaped values, <code>null</code> if null string input
079: */
080: public static String escapeJava(String str) {
081: return escapeJavaStyleString(str, false);
082: }
083:
084: /**
085: * <p>Escapes the characters in a <code>String</code> using Java String rules to
086: * a <code>Writer</code>.</p>
087: *
088: * <p>A <code>null</code> string input has no effect.</p>
089: *
090: * @see #escapeJava(java.lang.String)
091: * @param out Writer to write escaped string into
092: * @param str String to escape values in, may be null
093: * @throws IllegalArgumentException if the Writer is <code>null</code>
094: * @throws IOException if error occurs on underlying Writer
095: */
096: public static void escapeJava(Writer out, String str)
097: throws IOException {
098: escapeJavaStyleString(out, str, false);
099: }
100:
101: /**
102: * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
103: * <p>Escapes any values it finds into their JavaScript String form.
104: * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
105: *
106: * <p>So a tab becomes the characters <code>'\\'</code> and
107: * <code>'t'</code>.</p>
108: *
109: * <p>The only difference between Java strings and JavaScript strings
110: * is that in JavaScript, a single quote must be escaped.</p>
111: *
112: * <p>Example:
113: * <pre>
114: * input string: He didn't say, "Stop!"
115: * output string: He didn\'t say, \"Stop!\"
116: * </pre>
117: * </p>
118: *
119: * @param str String to escape values in, may be null
120: * @return String with escaped values, <code>null</code> if null string input
121: */
122: public static String escapeJavaScript(String str) {
123: return escapeJavaStyleString(str, true);
124: }
125:
126: /**
127: * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
128: * to a <code>Writer</code>.</p>
129: *
130: * <p>A <code>null</code> string input has no effect.</p>
131: *
132: * @see #escapeJavaScript(java.lang.String)
133: * @param out Writer to write escaped string into
134: * @param str String to escape values in, may be null
135: * @throws IllegalArgumentException if the Writer is <code>null</code>
136: * @throws IOException if error occurs on underlying Writer
137: **/
138: public static void escapeJavaScript(Writer out, String str)
139: throws IOException {
140: escapeJavaStyleString(out, str, true);
141: }
142:
143: /**
144: * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
145: *
146: * @param str String to escape values in, may be null
147: * @param escapeSingleQuotes escapes single quotes if <code>true</code>
148: * @return the escaped string
149: */
150: private static String escapeJavaStyleString(String str,
151: boolean escapeSingleQuotes) {
152: if (str == null) {
153: return null;
154: }
155: try {
156: StringWriter writer = new StringWriter(str.length() * 2);
157: escapeJavaStyleString(writer, str, escapeSingleQuotes);
158: return writer.toString();
159: } catch (IOException ioe) {
160: // this should never ever happen while writing to a StringWriter
161: ioe.printStackTrace();
162: return null;
163: }
164: }
165:
166: /**
167: * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
168: *
169: * @param out write to receieve the escaped string
170: * @param str String to escape values in, may be null
171: * @param escapeSingleQuote escapes single quotes if <code>true</code>
172: * @throws IOException if an IOException occurs
173: */
174: private static void escapeJavaStyleString(Writer out, String str,
175: boolean escapeSingleQuote) throws IOException {
176: if (out == null) {
177: throw new IllegalArgumentException(
178: "The Writer must not be null");
179: }
180: if (str == null) {
181: return;
182: }
183: int sz;
184: sz = str.length();
185: for (int i = 0; i < sz; i++) {
186: char ch = str.charAt(i);
187:
188: // handle unicode
189: if (ch > 0xfff) {
190: out.write("\\u" + hex(ch));
191: } else if (ch > 0xff) {
192: out.write("\\u0" + hex(ch));
193: } else if (ch > 0x7f) {
194: out.write("\\u00" + hex(ch));
195: } else if (ch < 32) {
196: switch (ch) {
197: case '\b':
198: out.write('\\');
199: out.write('b');
200: break;
201: case '\n':
202: out.write('\\');
203: out.write('n');
204: break;
205: case '\t':
206: out.write('\\');
207: out.write('t');
208: break;
209: case '\f':
210: out.write('\\');
211: out.write('f');
212: break;
213: case '\r':
214: out.write('\\');
215: out.write('r');
216: break;
217: default:
218: if (ch > 0xf) {
219: out.write("\\u00" + hex(ch));
220: } else {
221: out.write("\\u000" + hex(ch));
222: }
223: break;
224: }
225: } else {
226: switch (ch) {
227: case '\'':
228: if (escapeSingleQuote) {
229: out.write('\\');
230: }
231: out.write('\'');
232: break;
233: case '"':
234: out.write('\\');
235: out.write('"');
236: break;
237: case '\\':
238: out.write('\\');
239: out.write('\\');
240: break;
241: default:
242: out.write(ch);
243: break;
244: }
245: }
246: }
247: }
248:
249: /**
250: * <p>Returns an upper case hexadecimal <code>String</code> for the given
251: * character.</p>
252: *
253: * @param ch The character to convert.
254: * @return An upper case hexadecimal <code>String</code>
255: */
256: private static String hex(char ch) {
257: return Integer.toHexString(ch).toUpperCase();
258: }
259:
260: /**
261: * <p>Unescapes any Java literals found in the <code>String</code>.
262: * For example, it will turn a sequence of <code>'\'</code> and
263: * <code>'n'</code> into a newline character, unless the <code>'\'</code>
264: * is preceded by another <code>'\'</code>.</p>
265: *
266: * @param str the <code>String</code> to unescape, may be null
267: * @return a new unescaped <code>String</code>, <code>null</code> if null string input
268: */
269: public static String unescapeJava(String str) {
270: if (str == null) {
271: return null;
272: }
273: try {
274: StringWriter writer = new StringWriter(str.length());
275: unescapeJava(writer, str);
276: return writer.toString();
277: } catch (IOException ioe) {
278: // this should never ever happen while writing to a StringWriter
279: ioe.printStackTrace();
280: return null;
281: }
282: }
283:
284: /**
285: * <p>Unescapes any Java literals found in the <code>String</code> to a
286: * <code>Writer</code>.</p>
287: *
288: * <p>For example, it will turn a sequence of <code>'\'</code> and
289: * <code>'n'</code> into a newline character, unless the <code>'\'</code>
290: * is preceded by another <code>'\'</code>.</p>
291: *
292: * <p>A <code>null</code> string input has no effect.</p>
293: *
294: * @param out the <code>Writer</code> used to output unescaped characters
295: * @param str the <code>String</code> to unescape, may be null
296: * @throws IllegalArgumentException if the Writer is <code>null</code>
297: * @throws IOException if error occurs on underlying Writer
298: */
299: public static void unescapeJava(Writer out, String str)
300: throws IOException {
301: if (out == null) {
302: throw new IllegalArgumentException(
303: "The Writer must not be null");
304: }
305: if (str == null) {
306: return;
307: }
308: int sz = str.length();
309: StringBuffer unicode = new StringBuffer(4);
310: boolean hadSlash = false;
311: boolean inUnicode = false;
312: for (int i = 0; i < sz; i++) {
313: char ch = str.charAt(i);
314: if (inUnicode) {
315: // if in unicode, then we're reading unicode
316: // values in somehow
317: unicode.append(ch);
318: if (unicode.length() == 4) {
319: // unicode now contains the four hex digits
320: // which represents our unicode character
321: try {
322: int value = Integer.parseInt(
323: unicode.toString(), 16);
324: out.write((char) value);
325: unicode.setLength(0);
326: inUnicode = false;
327: hadSlash = false;
328: } catch (NumberFormatException nfe) {
329: throw new NestableRuntimeException(
330: "Unable to parse unicode value: "
331: + unicode, nfe);
332: }
333: }
334: continue;
335: }
336: if (hadSlash) {
337: // handle an escaped value
338: hadSlash = false;
339: switch (ch) {
340: case '\\':
341: out.write('\\');
342: break;
343: case '\'':
344: out.write('\'');
345: break;
346: case '\"':
347: out.write('"');
348: break;
349: case 'r':
350: out.write('\r');
351: break;
352: case 'f':
353: out.write('\f');
354: break;
355: case 't':
356: out.write('\t');
357: break;
358: case 'n':
359: out.write('\n');
360: break;
361: case 'b':
362: out.write('\b');
363: break;
364: case 'u': {
365: // uh-oh, we're in unicode country....
366: inUnicode = true;
367: break;
368: }
369: default:
370: out.write(ch);
371: break;
372: }
373: continue;
374: } else if (ch == '\\') {
375: hadSlash = true;
376: continue;
377: }
378: out.write(ch);
379: }
380: if (hadSlash) {
381: // then we're in the weird case of a \ at the end of the
382: // string, let's output it anyway.
383: out.write('\\');
384: }
385: }
386:
387: /**
388: * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
389: *
390: * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
391: * into a newline character, unless the <code>'\'</code> is preceded by another
392: * <code>'\'</code>.</p>
393: *
394: * @see #unescapeJava(String)
395: * @param str the <code>String</code> to unescape, may be null
396: * @return A new unescaped <code>String</code>, <code>null</code> if null string input
397: */
398: public static String unescapeJavaScript(String str) {
399: return unescapeJava(str);
400: }
401:
402: /**
403: * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
404: * <code>Writer</code>.</p>
405: *
406: * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
407: * into a newline character, unless the <code>'\'</code> is preceded by another
408: * <code>'\'</code>.</p>
409: *
410: * <p>A <code>null</code> string input has no effect.</p>
411: *
412: * @see #unescapeJava(Writer,String)
413: * @param out the <code>Writer</code> used to output unescaped characters
414: * @param str the <code>String</code> to unescape, may be null
415: * @throws IllegalArgumentException if the Writer is <code>null</code>
416: * @throws IOException if error occurs on underlying Writer
417: */
418: public static void unescapeJavaScript(Writer out, String str)
419: throws IOException {
420: unescapeJava(out, str);
421: }
422:
423: // HTML and XML
424: //--------------------------------------------------------------------------
425: /**
426: * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
427: *
428: * <p>
429: * For example:
430: * </p>
431: * <p><code>"bread" & "butter"</code></p>
432: * becomes:
433: * <p>
434: * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
435: * </p>
436: *
437: * <p>Supports all known HTML 4.0 entities, including funky accents.
438: * Note that the commonly used apostrophe escape character (&apos;)
439: * is not a legal entity and so is not supported). </p>
440: *
441: * @param str the <code>String</code> to escape, may be null
442: * @return a new escaped <code>String</code>, <code>null</code> if null string input
443: *
444: * @see #unescapeHtml(String)
445: * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
446: * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
447: * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
448: * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
449: * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
450: */
451: public static String escapeHtml(String str) {
452: if (str == null) {
453: return null;
454: }
455: try {
456: StringWriter writer = new StringWriter(
457: (int) (str.length() * 1.5));
458: escapeHtml(writer, str);
459: return writer.toString();
460: } catch (IOException e) {
461: //assert false;
462: //should be impossible
463: e.printStackTrace();
464: return null;
465: }
466: }
467:
468: /**
469: * <p>Escapes the characters in a <code>String</code> using HTML entities and writes
470: * them to a <code>Writer</code>.</p>
471: *
472: * <p>
473: * For example:
474: * </p>
475: * <code>"bread" & "butter"</code>
476: * <p>becomes:</p>
477: * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
478: *
479: * <p>Supports all known HTML 4.0 entities, including funky accents.
480: * Note that the commonly used apostrophe escape character (&apos;)
481: * is not a legal entity and so is not supported). </p>
482: *
483: * @param writer the writer receiving the escaped string, not null
484: * @param string the <code>String</code> to escape, may be null
485: * @throws IllegalArgumentException if the writer is null
486: * @throws IOException when <code>Writer</code> passed throws the exception from
487: * calls to the {@link Writer#write(int)} methods.
488: *
489: * @see #escapeHtml(String)
490: * @see #unescapeHtml(String)
491: * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
492: * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
493: * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
494: * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
495: * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
496: */
497: public static void escapeHtml(Writer writer, String string)
498: throws IOException {
499: if (writer == null) {
500: throw new IllegalArgumentException(
501: "The Writer must not be null.");
502: }
503: if (string == null) {
504: return;
505: }
506: Entities.HTML40.escape(writer, string);
507: }
508:
509: //-----------------------------------------------------------------------
510: /**
511: * <p>Unescapes a string containing entity escapes to a string
512: * containing the actual Unicode characters corresponding to the
513: * escapes. Supports HTML 4.0 entities.</p>
514: *
515: * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
516: * will become "<Français>"</p>
517: *
518: * <p>If an entity is unrecognized, it is left alone, and inserted
519: * verbatim into the result string. e.g. "&gt;&zzzz;x" will
520: * become ">&zzzz;x".</p>
521: *
522: * @param str the <code>String</code> to unescape, may be null
523: * @return a new unescaped <code>String</code>, <code>null</code> if null string input
524: * @see #escapeHtml(Writer, String)
525: */
526: public static String unescapeHtml(String str) {
527: if (str == null) {
528: return null;
529: }
530: try {
531: StringWriter writer = new StringWriter(
532: (int) (str.length() * 1.5));
533: unescapeHtml(writer, str);
534: return writer.toString();
535: } catch (IOException e) {
536: //assert false;
537: //should be impossible
538: e.printStackTrace();
539: return null;
540: }
541: }
542:
543: /**
544: * <p>Unescapes a string containing entity escapes to a string
545: * containing the actual Unicode characters corresponding to the
546: * escapes. Supports HTML 4.0 entities.</p>
547: *
548: * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
549: * will become "<Français>"</p>
550: *
551: * <p>If an entity is unrecognized, it is left alone, and inserted
552: * verbatim into the result string. e.g. "&gt;&zzzz;x" will
553: * become ">&zzzz;x".</p>
554: *
555: * @param writer the writer receiving the unescaped string, not null
556: * @param string the <code>String</code> to unescape, may be null
557: * @throws IllegalArgumentException if the writer is null
558: * @throws IOException if an IOException occurs
559: * @see #escapeHtml(String)
560: */
561: public static void unescapeHtml(Writer writer, String string)
562: throws IOException {
563: if (writer == null) {
564: throw new IllegalArgumentException(
565: "The Writer must not be null.");
566: }
567: if (string == null) {
568: return;
569: }
570: Entities.HTML40.unescape(writer, string);
571: }
572:
573: //-----------------------------------------------------------------------
574: /**
575: * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
576: *
577: * <p>For example: <tt>"bread" & "butter"</tt> =>
578: * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
579: * </p>
580: *
581: * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
582: * Does not support DTDs or external entities.</p>
583: *
584: * <p>Note that unicode characters greater than 0x7f are currently escaped to
585: * their numerical \\u equivalent. This may change in future releases. </p>
586: *
587: * @param writer the writer receiving the unescaped string, not null
588: * @param str the <code>String</code> to escape, may be null
589: * @throws IllegalArgumentException if the writer is null
590: * @throws IOException if there is a problem writing
591: * @see #unescapeXml(java.lang.String)
592: */
593: public static void escapeXml(Writer writer, String str)
594: throws IOException {
595: if (writer == null) {
596: throw new IllegalArgumentException(
597: "The Writer must not be null.");
598: }
599: if (str == null) {
600: return;
601: }
602: Entities.XML.escape(writer, str);
603: }
604:
605: /**
606: * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
607: *
608: * <p>For example: <tt>"bread" & "butter"</tt> =>
609: * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
610: * </p>
611: *
612: * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
613: * Does not support DTDs or external entities.</p>
614: *
615: * <p>Note that unicode characters greater than 0x7f are currently escaped to
616: * their numerical \\u equivalent. This may change in future releases. </p>
617: *
618: * @param str the <code>String</code> to escape, may be null
619: * @return a new escaped <code>String</code>, <code>null</code> if null string input
620: * @see #unescapeXml(java.lang.String)
621: */
622: public static String escapeXml(String str) {
623: if (str == null) {
624: return null;
625: }
626: return Entities.XML.escape(str);
627: }
628:
629: //-----------------------------------------------------------------------
630: /**
631: * <p>Unescapes a string containing XML entity escapes to a string
632: * containing the actual Unicode characters corresponding to the
633: * escapes.</p>
634: *
635: * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
636: * Does not support DTDs or external entities.</p>
637: *
638: * <p>Note that numerical \\u unicode codes are unescaped to their respective
639: * unicode characters. This may change in future releases. </p>
640: *
641: * @param writer the writer receiving the unescaped string, not null
642: * @param str the <code>String</code> to unescape, may be null
643: * @throws IllegalArgumentException if the writer is null
644: * @throws IOException if there is a problem writing
645: * @see #escapeXml(String)
646: */
647: public static void unescapeXml(Writer writer, String str)
648: throws IOException {
649: if (writer == null) {
650: throw new IllegalArgumentException(
651: "The Writer must not be null.");
652: }
653: if (str == null) {
654: return;
655: }
656: Entities.XML.unescape(writer, str);
657: }
658:
659: /**
660: * <p>Unescapes a string containing XML entity escapes to a string
661: * containing the actual Unicode characters corresponding to the
662: * escapes.</p>
663: *
664: * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
665: * Does not support DTDs or external entities.</p>
666: *
667: * <p>Note that numerical \\u unicode codes are unescaped to their respective
668: * unicode characters. This may change in future releases. </p>
669: *
670: * @param str the <code>String</code> to unescape, may be null
671: * @return a new unescaped <code>String</code>, <code>null</code> if null string input
672: * @see #escapeXml(String)
673: */
674: public static String unescapeXml(String str) {
675: if (str == null) {
676: return null;
677: }
678: return Entities.XML.unescape(str);
679: }
680:
681: //-----------------------------------------------------------------------
682: /**
683: * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
684: * an SQL query.</p>
685: *
686: * <p>For example,
687: * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
688: * StringEscapeUtils.escapeSql("McHale's Navy") +
689: * "'");</pre>
690: * </p>
691: *
692: * <p>At present, this method only turns single-quotes into doubled single-quotes
693: * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
694: * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
695: *
696: * see http://www.jguru.com/faq/view.jsp?EID=8881
697: * @param str the string to escape, may be null
698: * @return a new String, escaped for SQL, <code>null</code> if null string input
699: */
700: public static String escapeSql(String str) {
701: if (str == null) {
702: return null;
703: }
704: return StringUtils.replace(str, "'", "''");
705: }
706:
707: }
|