001: /*
002: * ====================================================================
003: * Copyright (c) 1995-1999 Purple Technology, Inc. All rights
004: * reserved.
005: *
006: * PLAIN LANGUAGE LICENSE: Do whatever you like with this code, free
007: * of charge, just give credit where credit is due. If you improve it,
008: * please send your improvements to alex@purpletech.com. Check
009: * http://www.purpletech.com/code/ for the latest version and news.
010: *
011: * LEGAL LANGUAGE LICENSE: Redistribution and use in source and binary
012: * forms, with or without modification, are permitted provided that
013: * the following conditions are met:
014: *
015: * 1. Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * 2. Redistributions in binary form must reproduce the above
019: * copyright notice, this list of conditions and the following
020: * disclaimer in the documentation and/or other materials provided
021: * with the distribution.
022: *
023: * 3. The names of the authors and the names "Purple Technology,"
024: * "Purple Server" and "Purple Chat" must not be used to endorse or
025: * promote products derived from this software without prior written
026: * permission. For written permission, please contact
027: * server@purpletech.com.
028: *
029: * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND PURPLE TECHNOLOGY ``AS
030: * IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
031: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
032: * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
033: * AUTHORS OR PURPLE TECHNOLOGY BE LIABLE FOR ANY DIRECT, INDIRECT,
034: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
035: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
036: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
037: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
038: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
039: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
040: * OF THE POSSIBILITY OF SUCH DAMAGE.
041: *
042: * ====================================================================
043: *
044: **/
045: package org.ejbca.util;
046:
047: import java.util.HashMap;
048: import java.util.Map;
049:
050: public class HTMLTools {
051: // see http://hotwired.lycos.com/webmonkey/reference/special_characters/
052: static Object[][] entities = {
053: // {"#39", new Integer(39)}, // ' - apostrophe
054: { "quot", new Integer(34) }, // " - double-quote
055: { "amp", new Integer(38) }, // & - ampersand
056: { "lt", new Integer(60) }, // < - less-than
057: { "gt", new Integer(62) }, // > - greater-than
058: { "nbsp", new Integer(160) }, // non-breaking space
059: { "copy", new Integer(169) }, // © - copyright
060: { "reg", new Integer(174) }, // ® - registered trademark
061: { "Agrave", new Integer(192) }, // À - uppercase A, grave accent
062: { "Aacute", new Integer(193) }, // Ã? - uppercase A, acute accent
063: { "Acirc", new Integer(194) }, // Â - uppercase A, circumflex accent
064: { "Atilde", new Integer(195) }, // Ã - uppercase A, tilde
065: { "Auml", new Integer(196) }, // Ä - uppercase A, umlaut
066: { "Aring", new Integer(197) }, // Ã… - uppercase A, ring
067: { "AElig", new Integer(198) }, // Æ - uppercase AE
068: { "Ccedil", new Integer(199) }, // Ç - uppercase C, cedilla
069: { "Egrave", new Integer(200) }, // È - uppercase E, grave accent
070: { "Eacute", new Integer(201) }, // É - uppercase E, acute accent
071: { "Ecirc", new Integer(202) }, // Ê - uppercase E, circumflex accent
072: { "Euml", new Integer(203) }, // Ë - uppercase E, umlaut
073: { "Igrave", new Integer(204) }, // Ì - uppercase I, grave accent
074: { "Iacute", new Integer(205) }, // Ã? - uppercase I, acute accent
075: { "Icirc", new Integer(206) }, // ÃŽ - uppercase I, circumflex accent
076: { "Iuml", new Integer(207) }, // Ã? - uppercase I, umlaut
077: { "ETH", new Integer(208) }, // Ã? - uppercase Eth, Icelandic
078: { "Ntilde", new Integer(209) }, // Ñ - uppercase N, tilde
079: { "Ograve", new Integer(210) }, // Ã’ - uppercase O, grave accent
080: { "Oacute", new Integer(211) }, // Ó - uppercase O, acute accent
081: { "Ocirc", new Integer(212) }, // Ô - uppercase O, circumflex accent
082: { "Otilde", new Integer(213) }, // Õ - uppercase O, tilde
083: { "Ouml", new Integer(214) }, // Ö - uppercase O, umlaut
084: { "Oslash", new Integer(216) }, // Ø - uppercase O, slash
085: { "Ugrave", new Integer(217) }, // Ù - uppercase U, grave accent
086: { "Uacute", new Integer(218) }, // Ú - uppercase U, acute accent
087: { "Ucirc", new Integer(219) }, // Û - uppercase U, circumflex accent
088: { "Uuml", new Integer(220) }, // Ü - uppercase U, umlaut
089: { "Yacute", new Integer(221) }, // Ã? - uppercase Y, acute accent
090: { "THORN", new Integer(222) }, // Þ - uppercase THORN, Icelandic
091: { "szlig", new Integer(223) }, // ß - lowercase sharps, German
092: { "agrave", new Integer(224) }, // Ã - lowercase a, grave accent
093: { "aacute", new Integer(225) }, // á - lowercase a, acute accent
094: { "acirc", new Integer(226) }, // â - lowercase a, circumflex accent
095: { "atilde", new Integer(227) }, // ã - lowercase a, tilde
096: { "auml", new Integer(228) }, // ä - lowercase a, umlaut
097: { "aring", new Integer(229) }, // å - lowercase a, ring
098: { "aelig", new Integer(230) }, // æ - lowercase ae
099: { "ccedil", new Integer(231) }, // ç - lowercase c, cedilla
100: { "egrave", new Integer(232) }, // è - lowercase e, grave accent
101: { "eacute", new Integer(233) }, // é - lowercase e, acute accent
102: { "ecirc", new Integer(234) }, // ê - lowercase e, circumflex accent
103: { "euml", new Integer(235) }, // ë - lowercase e, umlaut
104: { "igrave", new Integer(236) }, // ì - lowercase i, grave accent
105: { "iacute", new Integer(237) }, // Ã - lowercase i, acute accent
106: { "icirc", new Integer(238) }, // î - lowercase i, circumflex accent
107: { "iuml", new Integer(239) }, // ï - lowercase i, umlaut
108: { "igrave", new Integer(236) }, // ì - lowercase i, grave accent
109: { "iacute", new Integer(237) }, // Ã - lowercase i, acute accent
110: { "icirc", new Integer(238) }, // î - lowercase i, circumflex accent
111: { "iuml", new Integer(239) }, // ï - lowercase i, umlaut
112: { "eth", new Integer(240) }, // ð - lowercase eth, Icelandic
113: { "ntilde", new Integer(241) }, // ñ - lowercase n, tilde
114: { "ograve", new Integer(242) }, // ò - lowercase o, grave accent
115: { "oacute", new Integer(243) }, // ó - lowercase o, acute accent
116: { "ocirc", new Integer(244) }, // ô - lowercase o, circumflex accent
117: { "otilde", new Integer(245) }, // õ - lowercase o, tilde
118: { "ouml", new Integer(246) }, // ö - lowercase o, umlaut
119: { "oslash", new Integer(248) }, // ø - lowercase o, slash
120: { "ugrave", new Integer(249) }, // ù - lowercase u, grave accent
121: { "uacute", new Integer(250) }, // ú - lowercase u, acute accent
122: { "ucirc", new Integer(251) }, // û - lowercase u, circumflex accent
123: { "uuml", new Integer(252) }, // ü - lowercase u, umlaut
124: { "yacute", new Integer(253) }, // ý - lowercase y, acute accent
125: { "thorn", new Integer(254) }, // þ - lowercase thorn, Icelandic
126: { "yuml", new Integer(255) }, // ÿ - lowercase y, umlaut
127: { "euro", new Integer(8364) }, // Euro symbol
128: };
129: static Map e2i = new HashMap();
130: static Map i2e = new HashMap();
131: static {
132: for (int i = 0; i < entities.length; ++i) {
133: e2i.put(entities[i][0], entities[i][1]);
134: i2e.put(entities[i][1], entities[i][0]);
135: }
136: }
137:
138: /**
139: * Turns funky characters into HTML entity equivalents<p>
140: * e.g. <tt>"bread" & "butter"</tt> => <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
141: * Update: supports nearly all HTML entities, including funky accents. See the source code for more detail.
142: * @see #htmlunescape(String)
143: **/
144: public static String htmlescape(String s1) {
145: if (s1 == null)
146: return null;
147: StringBuffer buf = new StringBuffer();
148: int i;
149: for (i = 0; i < s1.length(); ++i) {
150: char ch = s1.charAt(i);
151: String entity = (String) i2e.get(new Integer(ch));
152: if (entity == null) {
153: if ((ch) > 128) {
154: buf.append("&#" + ((int) ch) + ";");
155: } else {
156: buf.append(ch);
157: }
158: } else {
159: buf.append("&" + entity + ";");
160: }
161: }
162: return buf.toString();
163: }
164:
165: /**
166: * Given a string containing entity escapes, returns a string
167: * containing the actual Unicode characters corresponding to the
168: * escapes.
169: *
170: * Note: nasty bug fixed by Helge Tesgaard (and, in parallel, by
171: * Alex, but Helge deserves major props for emailing me the fix).
172: * 15-Feb-2002 Another bug fixed by Sean Brown <sean@boohai.com>
173: *
174: * @see #htmlescape(String)
175: **/
176: public static String htmlunescape(String s1) {
177: if (s1 == null)
178: return null;
179: StringBuffer buf = new StringBuffer();
180: int i;
181: for (i = 0; i < s1.length(); ++i) {
182: char ch = s1.charAt(i);
183: if (ch == '&') {
184: int semi = s1.indexOf(';', i + 1);
185: if (semi == -1) {
186: buf.append(ch);
187: continue;
188: }
189: String entity = s1.substring(i + 1, semi);
190: Integer iso;
191: if (entity.charAt(0) == '#') {
192: iso = new Integer(entity.substring(1));
193: } else {
194: iso = (Integer) e2i.get(entity);
195: }
196: if (iso == null) {
197: buf.append("&" + entity + ";");
198: } else {
199: buf.append((char) (iso.intValue()));
200: }
201: i = semi;
202: } else {
203: buf.append(ch);
204: }
205: }
206: return buf.toString();
207: }
208:
209: public static String javascriptEscape(String str) {
210: String ret = str;
211: // In javascript the apostrof will destroy strings and cause the javascript to break
212: ret = ret.replaceAll("'", "\\\\'");
213: return ret;
214: }
215: }
|