001: /*
002: * Copyright 2002-2007 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.springframework.web.util;
018:
019: /**
020: * Utility class for HTML escaping. Escapes and unescapes
021: * based on the W3C HTML 4.01 recommendation, handling
022: * character entity references.
023: *
024: * <p>Reference:
025: * <a href="http://www.w3.org/TR/html4/charset.html">http://www.w3.org/TR/html4/charset.html</a>
026: *
027: * <p>For a comprehensive set of String escaping utilities,
028: * consider Jakarta Commons Lang and its StringEscapeUtils class.
029: * We are not using that class here to avoid a runtime dependency
030: * on Commons Lang just for HTML escaping. Furthermore, Spring's
031: * HTML escaping is more flexible and 100% HTML 4.0 compliant.
032: *
033: * @author Juergen Hoeller
034: * @author Martin Kersten
035: * @since 01.03.2003
036: * @see org.apache.commons.lang.StringEscapeUtils
037: */
038: public abstract class HtmlUtils {
039:
040: /**
041: * Shared instance of pre-parsed HTML character entity references.
042: */
043: private static final HtmlCharacterEntityReferences characterEntityReferences = new HtmlCharacterEntityReferences();
044:
045: /**
046: * Turn special characters into HTML character references.
047: * Handles complete character set defined in HTML 4.01 recommendation.
048: * <p>Escapes all special characters to their corresponding
049: * entity reference (e.g. <code><</code>).
050: * <p>Reference:
051: * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
052: * http://www.w3.org/TR/html4/sgml/entities.html
053: * </a>
054: * @param input the (unescaped) input string
055: * @return the escaped string
056: */
057: public static String htmlEscape(String input) {
058: if (input == null) {
059: return null;
060: }
061: StringBuffer escaped = new StringBuffer(input.length() * 2);
062: for (int i = 0; i < input.length(); i++) {
063: char character = input.charAt(i);
064: String reference = characterEntityReferences
065: .convertToReference(character);
066: if (reference != null) {
067: escaped.append(reference);
068: } else {
069: escaped.append(character);
070: }
071: }
072: return escaped.toString();
073: }
074:
075: /**
076: * Turn special characters into HTML character references.
077: * Handles complete character set defined in HTML 4.01 recommendation.
078: * <p>Escapes all special characters to their corresponding numeric
079: * reference in decimal format (&#<i>Decimal</i>;).
080: * <p>Reference:
081: * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
082: * http://www.w3.org/TR/html4/sgml/entities.html
083: * </a>
084: * @param input the (unescaped) input string
085: * @return the escaped string
086: */
087: public static String htmlEscapeDecimal(String input) {
088: if (input == null) {
089: return null;
090: }
091: StringBuffer escaped = new StringBuffer(input.length() * 2);
092: for (int i = 0; i < input.length(); i++) {
093: char character = input.charAt(i);
094: if (characterEntityReferences
095: .isMappedToReference(character)) {
096: escaped
097: .append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
098: escaped.append((int) character);
099: escaped
100: .append(HtmlCharacterEntityReferences.REFERENCE_END);
101: } else {
102: escaped.append(character);
103: }
104: }
105: return escaped.toString();
106: }
107:
108: /**
109: * Turn special characters into HTML character references.
110: * Handles complete character set defined in HTML 4.01 recommendation.
111: * <p>Escapes all special characters to their corresponding numeric
112: * reference in hex format (&#x<i>Hex</i>;).
113: * <p>Reference:
114: * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
115: * http://www.w3.org/TR/html4/sgml/entities.html
116: * </a>
117: * @param input the (unescaped) input string
118: * @return the escaped string
119: */
120: public static String htmlEscapeHex(String input) {
121: if (input == null) {
122: return null;
123: }
124: StringBuffer escaped = new StringBuffer(input.length() * 2);
125: for (int i = 0; i < input.length(); i++) {
126: char character = input.charAt(i);
127: if (characterEntityReferences
128: .isMappedToReference(character)) {
129: escaped
130: .append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
131: escaped.append(Integer.toString((int) character, 16));
132: escaped
133: .append(HtmlCharacterEntityReferences.REFERENCE_END);
134: } else {
135: escaped.append(character);
136: }
137: }
138: return escaped.toString();
139: }
140:
141: /**
142: * Turn HTML character references into their plain text UNICODE equivalent.
143: * <p>Handles complete character set defined in HTML 4.01 recommendation
144: * and all reference types (decimal, hex, and entity).
145: * <p>Correctly converts the following formats:
146: * <blockquote>
147: * &#<i>Entity</i>; - <i>(Example: &amp;) case sensitive</i>
148: * &#<i>Decimal</i>; - <i>(Example: &#68;)</i><br>
149: * &#x<i>Hex</i>; - <i>(Example: &#xE5;) case insensitive</i><br>
150: * </blockquote>
151: * Gracefully handles malformed character references by copying original
152: * characters as is when encountered.<p>
153: * <p>Reference:
154: * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
155: * http://www.w3.org/TR/html4/sgml/entities.html
156: * </a>
157: * @param input the (escaped) input string
158: * @return the unescaped string
159: */
160: public static String htmlUnescape(String input) {
161: if (input == null) {
162: return null;
163: }
164: return new HtmlCharacterEntityDecoder(
165: characterEntityReferences, input).decode();
166: }
167:
168: }
|