001: package org.methodize.nntprss.util;
002:
003: import java.util.HashMap;
004: import java.util.Map;
005:
006: /* -----------------------------------------------------------
007: * nntp//rss - a bridge between the RSS world and NNTP clients
008: * Copyright (c) 2002, 2003 Jason Brome. All Rights Reserved.
009: *
010: * email: nntprss@methodize.org
011: * mail: Methodize Solutions
012: * PO Box 3865
013: * Grand Central Station
014: * New York NY 10163
015: *
016: * This file is part of nntp//rss
017: *
018: * Entities list from:
019: * http://www.w3.org/TR/html401/sgml/entities.html
020: *
021: * Portions © International Organization for Standardization 1986:
022: * Permission to copy in any form is granted for use with
023: * conforming SGML systems and applications as defined in
024: * ISO 8879, provided this notice is included in all copies.
025: *
026: * nntp//rss is free software; you can redistribute it
027: * and/or modify it under the terms of the GNU General
028: * Public License as published by the Free Software Foundation;
029: * either version 2 of the License, or (at your option) any
030: * later version.
031: *
032: * This program is distributed in the hope that it will be
033: * useful, but WITHOUT ANY WARRANTY; without even the implied
034: * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
035: * PURPOSE. See the GNU General Public License for more
036: * details.
037: *
038: * You should have received a copy of the GNU General Public
039: * License along with this program; if not, write to the
040: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
041: * Boston, MA 02111-1307 USA
042: * ----------------------------------------------------- */
043:
044: /**
045: * @author Jason Brome <jason@methodize.org>
046: * @version $Id: HTMLHelper.java,v 1.1 2003/03/24 03:13:03 jasonbrome Exp $
047: */
048:
049: public class HTMLHelper {
050:
051: private static Map escapeMap = new HashMap();
052:
053: static {
054:
055: // escapeMap.put("nbsp", new Character((char) 160));
056: escapeMap.put("nbsp", new Character(' '));
057:
058: escapeMap.put("iexcl", new Character((char) 161));
059: escapeMap.put("cent", new Character((char) 162));
060: escapeMap.put("pound", new Character((char) 163));
061: escapeMap.put("curren", new Character((char) 164));
062: escapeMap.put("yen", new Character((char) 165));
063: escapeMap.put("brvbar", new Character((char) 166));
064: escapeMap.put("sect", new Character((char) 167));
065: escapeMap.put("uml", new Character((char) 168));
066: escapeMap.put("copy", new Character((char) 169));
067: escapeMap.put("ordf", new Character((char) 170));
068: escapeMap.put("laquo", new Character((char) 171));
069: escapeMap.put("not", new Character((char) 172));
070: escapeMap.put("shy", new Character((char) 173));
071: escapeMap.put("reg", new Character((char) 174));
072: escapeMap.put("macr", new Character((char) 175));
073: escapeMap.put("deg", new Character((char) 176));
074: escapeMap.put("plusmn", new Character((char) 177));
075: escapeMap.put("sup2", new Character((char) 178));
076: escapeMap.put("sup3", new Character((char) 179));
077: escapeMap.put("acute", new Character((char) 180));
078: escapeMap.put("micro", new Character((char) 181));
079: escapeMap.put("para", new Character((char) 182));
080: escapeMap.put("middot", new Character((char) 183));
081: escapeMap.put("cedil", new Character((char) 184));
082: escapeMap.put("sup1", new Character((char) 185));
083: escapeMap.put("ordm", new Character((char) 186));
084: escapeMap.put("raquo", new Character((char) 187));
085: escapeMap.put("frac14", new Character((char) 188));
086: escapeMap.put("frac12", new Character((char) 189));
087: escapeMap.put("frac34", new Character((char) 190));
088: escapeMap.put("iquest", new Character((char) 191));
089: escapeMap.put("Agrave", new Character((char) 192));
090: escapeMap.put("Aacute", new Character((char) 193));
091: escapeMap.put("Acirc", new Character((char) 194));
092: escapeMap.put("Atilde", new Character((char) 195));
093: escapeMap.put("Auml", new Character((char) 196));
094: escapeMap.put("Aring", new Character((char) 197));
095: escapeMap.put("AElig", new Character((char) 198));
096: escapeMap.put("Ccedil", new Character((char) 199));
097: escapeMap.put("Egrave", new Character((char) 200));
098: escapeMap.put("Eacute", new Character((char) 201));
099: escapeMap.put("Ecirc", new Character((char) 202));
100: escapeMap.put("Euml", new Character((char) 203));
101: escapeMap.put("Igrave", new Character((char) 204));
102: escapeMap.put("Iacute", new Character((char) 205));
103: escapeMap.put("Icirc", new Character((char) 206));
104: escapeMap.put("Iuml", new Character((char) 207));
105: escapeMap.put("ETH ", new Character((char) 208));
106: escapeMap.put("Ntilde", new Character((char) 209));
107: escapeMap.put("Ograve", new Character((char) 210));
108: escapeMap.put("Oacute", new Character((char) 211));
109: escapeMap.put("Ocirc", new Character((char) 212));
110: escapeMap.put("Otilde", new Character((char) 213));
111: escapeMap.put("Ouml", new Character((char) 214));
112: escapeMap.put("times", new Character((char) 215));
113: escapeMap.put("Oslash", new Character((char) 216));
114: escapeMap.put("Ugrave", new Character((char) 217));
115: escapeMap.put("Uacute", new Character((char) 218));
116: escapeMap.put("Ucirc", new Character((char) 219));
117: escapeMap.put("Uuml", new Character((char) 220));
118: escapeMap.put("Yacute", new Character((char) 221));
119: escapeMap.put("THORN", new Character((char) 222));
120: escapeMap.put("szlig", new Character((char) 223));
121: escapeMap.put("agrave", new Character((char) 224));
122: escapeMap.put("aacute", new Character((char) 225));
123: escapeMap.put("acirc", new Character((char) 226));
124: escapeMap.put("atilde", new Character((char) 227));
125: escapeMap.put("auml", new Character((char) 228));
126: escapeMap.put("aring", new Character((char) 229));
127: escapeMap.put("aelig", new Character((char) 230));
128: escapeMap.put("ccedil", new Character((char) 231));
129: escapeMap.put("egrave", new Character((char) 232));
130: escapeMap.put("eacute", new Character((char) 233));
131: escapeMap.put("ecirc", new Character((char) 234));
132: escapeMap.put("euml", new Character((char) 235));
133: escapeMap.put("igrave", new Character((char) 236));
134: escapeMap.put("iacute", new Character((char) 237));
135: escapeMap.put("icirc", new Character((char) 238));
136: escapeMap.put("iuml", new Character((char) 239));
137: escapeMap.put("eth ", new Character((char) 240));
138: escapeMap.put("ntilde", new Character((char) 241));
139: escapeMap.put("ograve", new Character((char) 242));
140: escapeMap.put("oacute", new Character((char) 243));
141: escapeMap.put("ocirc", new Character((char) 244));
142: escapeMap.put("otilde", new Character((char) 245));
143: escapeMap.put("ouml", new Character((char) 246));
144: escapeMap.put("divide", new Character((char) 247));
145: escapeMap.put("oslash", new Character((char) 248));
146: escapeMap.put("ugrave", new Character((char) 249));
147: escapeMap.put("uacute", new Character((char) 250));
148: escapeMap.put("ucirc", new Character((char) 251));
149: escapeMap.put("uuml", new Character((char) 252));
150: escapeMap.put("yacute", new Character((char) 253));
151: escapeMap.put("thorn", new Character((char) 254));
152: escapeMap.put("yuml", new Character((char) 255));
153:
154: // Mathematical, Greek and Symbolic characters for HTML
155: // Latin Extended-B
156: escapeMap.put("fnof", new Character((char) 402));
157:
158: // Greek
159: escapeMap.put("Alpha", new Character((char) 913));
160: escapeMap.put("Beta", new Character((char) 914));
161: escapeMap.put("Gamma", new Character((char) 915));
162: escapeMap.put("Delta", new Character((char) 916));
163: escapeMap.put("Epsilon", new Character((char) 917));
164: escapeMap.put("Zeta", new Character((char) 918));
165: escapeMap.put("Eta", new Character((char) 919));
166: escapeMap.put("Theta", new Character((char) 920));
167: escapeMap.put("Iota", new Character((char) 921));
168: escapeMap.put("Kappa", new Character((char) 922));
169: escapeMap.put("Lambda", new Character((char) 923));
170: escapeMap.put("Mu", new Character((char) 924));
171: escapeMap.put("Nu", new Character((char) 925));
172: escapeMap.put("Xi", new Character((char) 926));
173: escapeMap.put("Omicron", new Character((char) 927));
174: escapeMap.put("Pi", new Character((char) 928));
175: escapeMap.put("Rho", new Character((char) 929));
176: escapeMap.put("Sigma", new Character((char) 931));
177: escapeMap.put("Tau", new Character((char) 932));
178: escapeMap.put("Upsilon", new Character((char) 933));
179: escapeMap.put("Phi", new Character((char) 934));
180: escapeMap.put("Chi", new Character((char) 935));
181: escapeMap.put("Psi", new Character((char) 936));
182: escapeMap.put("Omega", new Character((char) 937));
183: escapeMap.put("alpha", new Character((char) 945));
184: escapeMap.put("beta", new Character((char) 946));
185: escapeMap.put("gamma", new Character((char) 947));
186: escapeMap.put("delta", new Character((char) 948));
187: escapeMap.put("epsilon", new Character((char) 949));
188: escapeMap.put("zeta", new Character((char) 950));
189: escapeMap.put("eta", new Character((char) 951));
190: escapeMap.put("theta", new Character((char) 952));
191: escapeMap.put("iota", new Character((char) 953));
192: escapeMap.put("kappa", new Character((char) 954));
193: escapeMap.put("lambda", new Character((char) 955));
194: escapeMap.put("mu", new Character((char) 956));
195: escapeMap.put("nu", new Character((char) 957));
196: escapeMap.put("xi", new Character((char) 958));
197: escapeMap.put("omicron", new Character((char) 959));
198: escapeMap.put("pi", new Character((char) 960));
199: escapeMap.put("rho", new Character((char) 961));
200: escapeMap.put("sigmaf", new Character((char) 962));
201: escapeMap.put("sigma", new Character((char) 963));
202: escapeMap.put("tau", new Character((char) 964));
203: escapeMap.put("upsilon", new Character((char) 965));
204: escapeMap.put("phi", new Character((char) 966));
205: escapeMap.put("chi", new Character((char) 967));
206: escapeMap.put("psi", new Character((char) 968));
207: escapeMap.put("omega", new Character((char) 969));
208: escapeMap.put("thetasym", new Character((char) 977));
209: escapeMap.put("upsih", new Character((char) 978));
210: escapeMap.put("piv", new Character((char) 982));
211:
212: // General Punctuation
213: escapeMap.put("bull", new Character((char) 8226));
214: escapeMap.put("hellip", new Character((char) 8230));
215: escapeMap.put("prime", new Character((char) 8242));
216: escapeMap.put("Prime", new Character((char) 8243));
217: escapeMap.put("oline", new Character((char) 8254));
218: escapeMap.put("frasl", new Character((char) 8260));
219:
220: // Letterlike Symbols
221: escapeMap.put("weierp", new Character((char) 8472));
222: escapeMap.put("image", new Character((char) 8465));
223: escapeMap.put("real", new Character((char) 8476));
224: escapeMap.put("trade", new Character((char) 8482));
225: escapeMap.put("alefsym", new Character((char) 8501));
226:
227: // Arrows
228: escapeMap.put("larr", new Character((char) 8592));
229: escapeMap.put("uarr", new Character((char) 8593));
230: escapeMap.put("rarr", new Character((char) 8594));
231: escapeMap.put("darr", new Character((char) 8595));
232: escapeMap.put("harr", new Character((char) 8596));
233: escapeMap.put("crarr", new Character((char) 8629));
234: escapeMap.put("lArr", new Character((char) 8656));
235: escapeMap.put("uArr", new Character((char) 8657));
236: escapeMap.put("rArr", new Character((char) 8658));
237: escapeMap.put("dArr", new Character((char) 8659));
238: escapeMap.put("hArr", new Character((char) 8660));
239:
240: // Mathematical Operators
241: escapeMap.put("forall", new Character((char) 8704));
242: escapeMap.put("part", new Character((char) 8706));
243: escapeMap.put("exist", new Character((char) 8707));
244: escapeMap.put("empty", new Character((char) 8709));
245: escapeMap.put("nabla", new Character((char) 8711));
246: escapeMap.put("isin", new Character((char) 8712));
247: escapeMap.put("notin", new Character((char) 8713));
248: escapeMap.put("ni", new Character((char) 8715));
249: escapeMap.put("prod", new Character((char) 8719));
250: escapeMap.put("sum", new Character((char) 8721));
251: escapeMap.put("minus", new Character((char) 8722));
252: escapeMap.put("lowast", new Character((char) 8727));
253: escapeMap.put("radic", new Character((char) 8730));
254: escapeMap.put("prop", new Character((char) 8733));
255: escapeMap.put("infin", new Character((char) 8734));
256: escapeMap.put("ang", new Character((char) 8736));
257: escapeMap.put("and", new Character((char) 8743));
258: escapeMap.put("or", new Character((char) 8744));
259: escapeMap.put("cap", new Character((char) 8745));
260: escapeMap.put("cup", new Character((char) 8746));
261: escapeMap.put("int", new Character((char) 8747));
262: escapeMap.put("there4", new Character((char) 8756));
263: escapeMap.put("sim", new Character((char) 8764));
264: escapeMap.put("cong", new Character((char) 8773));
265: escapeMap.put("asymp", new Character((char) 8776));
266: escapeMap.put("ne", new Character((char) 8800));
267: escapeMap.put("equiv", new Character((char) 8801));
268: escapeMap.put("le", new Character((char) 8804));
269: escapeMap.put("ge", new Character((char) 8805));
270: escapeMap.put("sub", new Character((char) 8834));
271: escapeMap.put("sup", new Character((char) 8835));
272: escapeMap.put("nsub", new Character((char) 8836));
273: escapeMap.put("sube", new Character((char) 8838));
274: escapeMap.put("supe", new Character((char) 8839));
275: escapeMap.put("oplus", new Character((char) 8853));
276: escapeMap.put("otimes", new Character((char) 8855));
277: escapeMap.put("perp", new Character((char) 8869));
278: escapeMap.put("sdot", new Character((char) 8901));
279:
280: // Miscellaneous Technical
281: escapeMap.put("lceil", new Character((char) 8968));
282: escapeMap.put("rceil", new Character((char) 8969));
283: escapeMap.put("lfloor", new Character((char) 8970));
284: escapeMap.put("rfloor", new Character((char) 8971));
285: escapeMap.put("lang", new Character((char) 9001));
286: escapeMap.put("rang", new Character((char) 9002));
287:
288: // Geometric Shapes
289: escapeMap.put("loz", new Character((char) 9674));
290:
291: // Miscellaneous Symbols
292: escapeMap.put("spades", new Character((char) 9824));
293: escapeMap.put("clubs", new Character((char) 9827));
294: escapeMap.put("hearts", new Character((char) 9829));
295: escapeMap.put("diams", new Character((char) 9830));
296:
297: // Special characters for HTML
298: // C0 Controls and Basic Latin
299: escapeMap.put("quot", new Character((char) 34));
300: escapeMap.put("amp", new Character((char) 38));
301: escapeMap.put("lt", new Character((char) 60));
302: escapeMap.put("gt", new Character((char) 62));
303: escapeMap.put("apos", new Character('\''));
304:
305: // Latin Extended-A
306: escapeMap.put("OElig", new Character((char) 338));
307: escapeMap.put("oelig", new Character((char) 339));
308: escapeMap.put("Scaron", new Character((char) 352));
309: escapeMap.put("scaron", new Character((char) 353));
310: escapeMap.put("Yuml", new Character((char) 376));
311:
312: // Spacing Modifier Letters
313: escapeMap.put("circ", new Character((char) 710));
314: escapeMap.put("tilde", new Character((char) 732));
315:
316: // General Punctuation
317: escapeMap.put("ensp", new Character((char) 8194));
318: escapeMap.put("emsp", new Character((char) 8195));
319: escapeMap.put("thinsp", new Character((char) 8201));
320: escapeMap.put("zwnj", new Character((char) 8204));
321: escapeMap.put("zwj", new Character((char) 8205));
322: escapeMap.put("lrm", new Character((char) 8206));
323: escapeMap.put("rlm", new Character((char) 8207));
324: escapeMap.put("ndash", new Character((char) 8211));
325: escapeMap.put("mdash", new Character((char) 8212));
326: escapeMap.put("lsquo", new Character((char) 8216));
327: escapeMap.put("rsquo", new Character((char) 8217));
328: escapeMap.put("sbquo", new Character((char) 8218));
329: escapeMap.put("ldquo", new Character((char) 8220));
330: escapeMap.put("rdquo", new Character((char) 8221));
331: escapeMap.put("bdquo", new Character((char) 8222));
332: escapeMap.put("dagger", new Character((char) 8224));
333: escapeMap.put("Dagger", new Character((char) 8225));
334: escapeMap.put("permil", new Character((char) 8240));
335: escapeMap.put("lsaquo", new Character((char) 8249));
336: escapeMap.put("rsaquo", new Character((char) 8250));
337: escapeMap.put("euro", new Character((char) 8364));
338:
339: }
340:
341: public static String unescapeString(String value) {
342: StringBuffer unescapedString = new StringBuffer();
343: StringBuffer charBuf = null;
344:
345: for (int pos = 0; pos < value.length(); pos++) {
346: char c = value.charAt(pos);
347:
348: if (c == '&') { // Process reference...
349: c = value.charAt(++pos);
350: boolean numeric = false;
351: if (c == '#') {
352: numeric = true;
353: c = value.charAt(++pos);
354: }
355:
356: if (charBuf == null) {
357: charBuf = new StringBuffer(32);
358: } else {
359: charBuf.setLength(0);
360: }
361:
362: while (c != ';' && pos < value.length() - 1) {
363: charBuf.append(c);
364: c = value.charAt(++pos);
365: }
366:
367: if (numeric) {
368: try {
369: c = (char) Integer.parseInt(charBuf.toString(),
370: 16);
371: } catch (NumberFormatException nfe) {
372: // If we can't process it, just write out the text...
373: unescapedString.append("&#").append(
374: charBuf.toString()).append(';');
375: }
376: } else {
377: Character unescapedVer = (Character) escapeMap
378: .get(charBuf.toString());
379: if (unescapedVer != null) {
380: unescapedString.append(unescapedVer);
381: } else {
382: unescapedString.append("&").append(
383: charBuf.toString()).append(';');
384: }
385: }
386:
387: } else {
388: unescapedString.append(c);
389: }
390:
391: }
392:
393: return unescapedString.toString();
394: }
395:
396: public static String escapeString(String value) {
397: StringBuffer escapedString = new StringBuffer();
398: for (int charCount = 0; charCount < value.length(); charCount++) {
399: char c = value.charAt(charCount);
400: switch (c) {
401: case '&':
402: escapedString.append("&");
403: break;
404: case '<':
405: escapedString.append("<");
406: break;
407: case '>':
408: escapedString.append(">");
409: break;
410: case '\"':
411: escapedString.append(""");
412: break;
413: case '\'':
414: escapedString.append("'");
415: break;
416: default:
417: escapedString.append(c);
418: }
419: }
420: return escapedString.toString();
421: }
422:
423: public static String stripCRLF(String value) {
424: StringBuffer strippedString = new StringBuffer();
425: for (int i = 0; i < value.length(); i++) {
426: char c = value.charAt(i);
427: if (!(c == '\n' || c == '\r')) {
428: strippedString.append(c);
429: }
430: }
431: return strippedString.toString();
432: }
433: }
|