001: /**
002: * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
019: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
020: * SOFTWARE.
021: */package com.liferay.util;
022:
023: import com.liferay.portal.kernel.util.StringMaker;
024: import com.liferay.portal.kernel.util.StringPool;
025: import com.liferay.portal.kernel.util.StringUtil;
026:
027: /**
028: * <a href="Html.java.html"><b><i>View Source</i></b></a>
029: *
030: * @author Brian Wing Shun Chan
031: * @author Clarence Shen
032: * @author Harry Mark
033: *
034: */
035: public class Html {
036:
037: public static String escape(String text) {
038: if (text == null) {
039: return null;
040: }
041:
042: // Escape using XSS recommendations from
043: // http://www.owasp.org/index.php/Cross_Site_Scripting
044: // #How_to_Protect_Yourself
045:
046: StringMaker sm = new StringMaker(text.length());
047:
048: for (int i = 0; i < text.length(); i++) {
049: char c = text.charAt(i);
050:
051: switch (c) {
052: case '<':
053: sm.append("<");
054:
055: break;
056:
057: case '>':
058: sm.append(">");
059:
060: break;
061:
062: case '&':
063: sm.append("&");
064:
065: break;
066:
067: case '"':
068: sm.append(""");
069:
070: break;
071:
072: case '\'':
073: sm.append("'");
074:
075: break;
076:
077: case '(':
078: sm.append("(");
079:
080: break;
081:
082: case ')':
083: sm.append(")");
084:
085: break;
086:
087: case '#':
088: sm.append("#");
089:
090: break;
091:
092: case '%':
093: sm.append("%");
094:
095: break;
096:
097: case ';':
098: sm.append(";");
099:
100: break;
101:
102: case '+':
103: sm.append("+");
104:
105: break;
106:
107: case '-':
108: sm.append("-");
109:
110: break;
111:
112: default:
113: sm.append(c);
114:
115: break;
116: }
117: }
118:
119: return sm.toString();
120: }
121:
122: public static String fromInputSafe(String text) {
123: return StringUtil.replace(text, "&", "&");
124: }
125:
126: public static String replaceMsWordCharacters(String text) {
127: return StringUtil
128: .replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
129: }
130:
131: public static String stripBetween(String text, String tag) {
132: return StringUtil.stripBetween(text, "<" + tag, "</" + tag
133: + ">");
134: }
135:
136: public static String stripComments(String text) {
137: return StringUtil.stripBetween(text, "<!--", "-->");
138: }
139:
140: public static String stripHtml(String text) {
141: if (text == null) {
142: return null;
143: }
144:
145: text = stripComments(text);
146:
147: StringMaker sm = new StringMaker(text.length());
148:
149: int x = 0;
150: int y = text.indexOf("<");
151:
152: while (y != -1) {
153: sm.append(text.substring(x, y));
154: sm.append(StringPool.SPACE);
155:
156: // Look for text enclosed by <script></script>
157:
158: boolean scriptFound = _isScriptTag(text, y + 1);
159:
160: if (scriptFound) {
161: int pos = y + _TAG_SCRIPT.length;
162:
163: // Find end of the tag
164:
165: pos = text.indexOf(">", pos);
166:
167: if (pos >= 0) {
168:
169: // Check if preceding character is / (i.e. is this instance
170: // of <script/>)
171:
172: if (text.charAt(pos - 1) != '/') {
173:
174: // Search for the ending </script> tag
175:
176: for (;;) {
177: pos = text.indexOf("</", pos);
178:
179: if (pos >= 0) {
180: if (_isScriptTag(text, pos + 2)) {
181: y = pos;
182:
183: break;
184: } else {
185:
186: // Skip past "</"
187:
188: pos += 2;
189: }
190: } else {
191: break;
192: }
193: }
194: }
195: }
196: }
197:
198: x = text.indexOf(">", y);
199:
200: if (x == -1) {
201: break;
202: }
203:
204: x++;
205:
206: if (x < y) {
207:
208: // <b>Hello</b
209:
210: break;
211: }
212:
213: y = text.indexOf("<", x);
214: }
215:
216: if (y == -1) {
217: sm.append(text.substring(x, text.length()));
218: }
219:
220: return sm.toString();
221: }
222:
223: public static String toInputSafe(String text) {
224: return StringUtil.replace(text, new String[] { "&", "\"" },
225: new String[] { "&", """ });
226: }
227:
228: public static String unescape(String text) {
229: if (text == null) {
230: return null;
231: }
232:
233: // Optimize this
234:
235: text = StringUtil.replace(text, "<", "<");
236: text = StringUtil.replace(text, ">", ">");
237: text = StringUtil.replace(text, "&", "&");
238: text = StringUtil.replace(text, """, "\"");
239: text = StringUtil.replace(text, "'", "'");
240: text = StringUtil.replace(text, "(", "(");
241: text = StringUtil.replace(text, ")", ")");
242: text = StringUtil.replace(text, "#", "#");
243: text = StringUtil.replace(text, "%", "%");
244: text = StringUtil.replace(text, ";", ";");
245: text = StringUtil.replace(text, "+", "+");
246: text = StringUtil.replace(text, "-", "-");
247:
248: return text;
249: }
250:
251: private static boolean _isScriptTag(String text, int start) {
252: char item;
253: int pos = start;
254:
255: if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
256: for (int i = 0; i < _TAG_SCRIPT.length; i++) {
257: item = text.charAt(pos++);
258:
259: if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
260: return false;
261: }
262: }
263:
264: item = text.charAt(pos);
265:
266: // Check that char after "script" is not a letter (i.e. another tag)
267:
268: return !Character.isLetter(item);
269: } else {
270: return false;
271: }
272: }
273:
274: private static final String[] _MS_WORD_UNICODE = new String[] {
275: "\u00ae", "\u2019", "\u201c", "\u201d" };
276:
277: private static final String[] _MS_WORD_HTML = new String[] {
278: "®", StringPool.APOSTROPHE, StringPool.QUOTE,
279: StringPool.QUOTE };
280:
281: private static final char[] _TAG_SCRIPT = { 's', 'c', 'r', 'i',
282: 'p', 't' };
283:
284: }
|