001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml;
030:
031: import com.caucho.util.IntMap;
032:
033: import java.io.IOException;
034:
035: class HtmlEntities extends Entities {
036: private static HtmlEntities _html40;
037: private static HtmlEntities _html32;
038:
039: protected char[][] _latin1;
040: protected char[][] _attrLatin1;
041:
042: protected char[][] _sparseEntity = new char[8192][];
043: protected char[] _sparseChar = new char[8192];
044:
045: protected IntMap _entityToChar;
046:
047: static Entities create(double version) {
048: if (version == 0 || version >= 4.0) {
049: if (_html40 == null)
050: _html40 = new HtmlEntities(4.0);
051: return _html40;
052: } else {
053: if (_html32 == null)
054: _html32 = new HtmlEntities(3.2);
055: return _html32;
056: }
057: }
058:
059: protected HtmlEntities(double version) {
060: _entityToChar = new IntMap();
061: initLatin1();
062: if (version >= 4.0) {
063: initSymbol();
064: initSpecial();
065: }
066:
067: _latin1 = new char[256][];
068: for (int i = 0; i < 32; i++) {
069: _latin1[i] = ("&#" + i + ";").toCharArray();
070: }
071: _latin1['\t'] = "\t".toCharArray();
072: _latin1['\n'] = "\n".toCharArray();
073: _latin1['\r'] = "\r".toCharArray();
074:
075: for (int i = 32; i < 127; i++)
076: _latin1[i] = ("" + (char) i).toCharArray();
077:
078: _latin1['<'] = "<".toCharArray();
079: _latin1['>'] = ">".toCharArray();
080: _latin1['&'] = "&".toCharArray();
081:
082: for (int i = 127; i < 256; i++) {
083: char[] value = getSparseEntity(i);
084:
085: if (value != null)
086: _latin1[i] = value;
087: else
088: _latin1[i] = ("&#" + i + ";").toCharArray();
089: }
090:
091: _attrLatin1 = new char[256][];
092: for (int i = 0; i < _latin1.length; i++)
093: _attrLatin1[i] = _latin1[i];
094:
095: // unquoted matches Xalan/Xerces
096: _attrLatin1['<'] = "<".toCharArray();
097: _attrLatin1['>'] = ">".toCharArray();
098: _attrLatin1['"'] = """.toCharArray();
099: _attrLatin1['\n'] = " ".toCharArray();
100: _attrLatin1['\r'] = " ".toCharArray();
101: }
102:
103: int getEntity(String entity) {
104: return _entityToChar.get(entity);
105: }
106:
107: /**
108: * Prints escaped text.
109: */
110: void printText(XmlPrinter os, char[] text, int offset, int length,
111: boolean attr) throws IOException {
112: for (int i = 0; i < length; i++) {
113: char ch = text[offset + i];
114:
115: if (ch == '&') {
116: if (i + 1 < length && text[offset + i + 1] == '{')
117: os.print('&');
118: else if (attr)
119: os.print(_attrLatin1[ch]);
120: else
121: os.print(_latin1[ch]);
122: } else if (ch < 256) {
123: if (attr)
124: os.print(_attrLatin1[ch]);
125: else
126: os.print(_latin1[ch]);
127: } else {
128: char[] value = getSparseEntity(ch);
129: if (value != null) {
130: os.print(value);
131: } else {
132: os.print("&#");
133: os.print((int) ch);
134: os.print(";");
135: }
136: }
137: }
138: }
139:
140: private void initLatin1() {
141: entity("nbsp", 160);
142: entity("iexcl", 161);
143: entity("cent", 162);
144: entity("pound", 163);
145: entity("curren", 164);
146: entity("yen", 165);
147: entity("brvbar", 166);
148: entity("sect", 167);
149: entity("uml", 168);
150: entity("copy", 169);
151: entity("ordf", 170);
152: entity("laquo", 171);
153: entity("not", 172);
154: entity("shy", 173);
155: entity("reg", 174);
156: entity("macr", 175);
157: entity("deg", 176);
158: entity("plusmn", 177);
159: entity("sup2", 178);
160: entity("sup3", 179);
161: entity("acute", 180);
162: entity("micro", 181);
163: entity("para", 182);
164: entity("middot", 183);
165: entity("cedil", 184);
166: entity("sup1", 185);
167: entity("ordm", 186);
168: entity("raquo", 187);
169: entity("frac14", 188);
170: entity("frac12", 189);
171: entity("frac34", 190);
172: entity("iquest", 191);
173: entity("Agrave", 192);
174: entity("Aacute", 193);
175: entity("Acirc", 194);
176: entity("Atilde", 195);
177: entity("Auml", 196);
178: entity("Aring", 197);
179: entity("AElig", 198);
180: entity("Ccedil", 199);
181: entity("Egrave", 200);
182: entity("Eacute", 201);
183: entity("Ecirc", 202);
184: entity("Euml", 203);
185: entity("Igrave", 204);
186: entity("Iacute", 205);
187: entity("Icirc", 206);
188: entity("Iuml", 207);
189: entity("ETH", 208);
190: entity("Ntilde", 209);
191: entity("Ograve", 210);
192: entity("Oacute", 211);
193: entity("Ocirc", 212);
194: entity("Otilde", 213);
195: entity("Ouml", 214);
196: entity("times", 215);
197: entity("Oslash", 216);
198: entity("Ugrave", 217);
199: entity("Uacute", 218);
200: entity("Ucirc", 219);
201: entity("Uuml", 220);
202: entity("Yacute", 221);
203: entity("THORN", 222);
204: entity("szlig", 223);
205: entity("agrave", 224);
206: entity("aacute", 225);
207: entity("acirc", 226);
208: entity("atilde", 227);
209: entity("auml", 228);
210: entity("aring", 229);
211: entity("aelig", 230);
212: entity("ccedil", 231);
213: entity("egrave", 232);
214: entity("eacute", 233);
215: entity("ecirc", 234);
216: entity("euml", 235);
217: entity("igrave", 236);
218: entity("iacute", 237);
219: entity("icirc", 238);
220: entity("iuml", 239);
221: entity("eth", 240);
222: entity("ntilde", 241);
223: entity("ograve", 242);
224: entity("oacute", 243);
225: entity("ocirc", 244);
226: entity("otilde", 245);
227: entity("ouml", 246);
228: entity("divide", 247);
229: entity("oslash", 248);
230: entity("ugrave", 249);
231: entity("uacute", 250);
232: entity("ucirc", 251);
233: entity("uuml", 252);
234: entity("yacute", 253);
235: entity("thorn", 254);
236: entity("yuml", 255);
237: }
238:
239: private void initSymbol() {
240: entity("fnof", 402);
241: entity("Alpha", 913);
242: entity("Beta", 914);
243: entity("Gamma", 915);
244: entity("Delta", 916);
245: entity("Epsilon", 917);
246: entity("Zeta", 918);
247: entity("Eta", 919);
248: entity("Theta", 920);
249: entity("Iota", 921);
250: entity("Kappa", 922);
251: entity("Lambda", 923);
252: entity("Mu", 924);
253: entity("Nu", 925);
254: entity("Xi", 926);
255: entity("Omicron", 927);
256: entity("Pi", 928);
257: entity("Rho", 929);
258: entity("Sigma", 931);
259: entity("Tau", 932);
260: entity("Upsilon", 933);
261: entity("Phi", 934);
262: entity("Chi", 935);
263: entity("Psi", 936);
264: entity("Omega", 937);
265: entity("alpha", 945);
266: entity("beta", 946);
267: entity("gamma", 947);
268: entity("delta", 948);
269: entity("epsilon", 949);
270: entity("zeta", 950);
271: entity("eta", 951);
272: entity("theta", 952);
273: entity("iota", 953);
274: entity("kappa", 954);
275: entity("lambda", 955);
276: entity("mu", 956);
277: entity("nu", 957);
278: entity("xi", 958);
279: entity("omicron", 959);
280: entity("pi", 960);
281: entity("rho", 961);
282: entity("sigmaf", 962);
283: entity("sigma", 963);
284: entity("tau", 964);
285: entity("upsilon", 965);
286: entity("phi", 966);
287: entity("chi", 967);
288: entity("psi", 968);
289: entity("omega", 969);
290: entity("thetasym", 977);
291: entity("upsih", 978);
292: entity("piv", 982);
293:
294: entity("bull", 8226);
295: entity("hellip", 8230);
296: entity("prime", 8242);
297: entity("Prime", 8243);
298: entity("oline", 8254);
299: entity("frasl", 8260);
300: entity("weirp", 8472);
301: entity("image", 8465);
302: entity("real", 8476);
303: entity("trade", 8482);
304: entity("alefsym", 8501);
305:
306: entity("larr", 8592);
307: entity("uarr", 8593);
308: entity("rarr", 8594);
309: entity("darr", 8595);
310: entity("harr", 8596);
311: entity("crarr", 8629);
312: entity("lArr", 8656);
313: entity("uArr", 8657);
314: entity("rArr", 8658);
315: entity("dArr", 8659);
316: entity("hArr", 8660);
317:
318: entity("forall", 8704);
319: entity("part", 8706);
320: entity("exist", 8707);
321: entity("empty", 8709);
322: entity("nabla", 8711);
323: entity("isin", 8712);
324: entity("ni", 8715);
325: entity("prod", 8719);
326: entity("sum", 8721);
327: entity("minus", 8722);
328: entity("lowas", 8727);
329: entity("radic", 8730);
330: entity("prop", 8733);
331: entity("infin", 8734);
332: entity("ang", 8736);
333: entity("and", 8743);
334: entity("or", 8744);
335: entity("cap", 8745);
336: entity("cup", 8746);
337: entity("int", 8747);
338: entity("there4", 8756);
339: entity("sim", 8764);
340: entity("cong", 8773);
341: entity("asymp", 8776);
342: entity("ne", 8800);
343: entity("equiv", 8801);
344: entity("le", 8804);
345: entity("ge", 8805);
346: entity("sub", 8834);
347: entity("sup", 8835);
348: entity("nsub", 8836);
349: entity("sube", 8838);
350: entity("supe", 8839);
351: entity("oplus", 8853);
352: entity("otimes", 8855);
353: entity("perp", 8869);
354: entity("sdot", 8901);
355: entity("lceil", 8968);
356: entity("rceil", 8969);
357: entity("lfloor", 8970);
358: entity("rfloor", 8971);
359: entity("lang", 9001);
360: entity("rang", 9002);
361:
362: entity("loz", 9674);
363: entity("spades", 9824);
364: entity("clubs", 9827);
365: entity("hearts", 9829);
366: entity("diams", 9830);
367: }
368:
369: private void initSpecial() {
370: entity("quot", 34);
371: entity("amp", 38);
372: entity("lt", 60);
373: entity("gt", 62);
374: entity("apos", '\'');
375: entity("OElig", 338);
376: entity("oelig", 339);
377: entity("Scaron", 352);
378: entity("scaron", 353);
379: entity("Yuml", 376);
380: entity("circ", 710);
381: entity("tilde", 732);
382: entity("ensp", 8194);
383: entity("emsp", 8195);
384: entity("thinsp", 8201);
385: entity("zwnj", 8204);
386: entity("zwj", 8205);
387: entity("lrm", 8206);
388: entity("rlm", 8207);
389: entity("ndash", 8211);
390: entity("mdash", 8212);
391: entity("lsquo", 8216);
392: entity("rsquo", 8217);
393: entity("sbquo", 8218);
394: entity("ldquo", 8220);
395: entity("rdquo", 8221);
396: entity("bdquo", 8222);
397: entity("dagger", 8224);
398: entity("Dagger", 8225);
399: entity("permil", 8240);
400: entity("lsaquo", 8249);
401: entity("rsaquo", 8250);
402: entity("euro", 8364);
403: }
404:
405: /**
406: * Returns the character entity for the given character. The
407: * map is sparse.
408: */
409: protected char[] getSparseEntity(int ch) {
410: int size = _sparseChar.length;
411:
412: int i = (ch * 65521) % size;
413: if (i < 0)
414: i = -i;
415: for (; _sparseChar[i] != ch && _sparseEntity[i] != null; i = (i + 1)
416: % size) {
417: }
418:
419: return _sparseEntity[i];
420: }
421:
422: private void entity(String name, int ch) {
423: _entityToChar.put(name, ch);
424:
425: int size = _sparseChar.length;
426:
427: int i = (ch * 65521) % size;
428: if (i < 0)
429: i = -i;
430: for (; _sparseChar[i] != ch && _sparseEntity[i] != null; i = (i + 1)
431: % size) {
432: }
433:
434: _sparseChar[i] = (char) ch;
435: _sparseEntity[i] = ("&" + name + ";").toCharArray();
436: }
437: }
|