001: /*
002: * Java HTML Tidy - JTidy
003: * HTML parser and pretty printer
004: *
005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
006: * Institute of Technology, Institut National de Recherche en
007: * Informatique et en Automatique, Keio University). All Rights
008: * Reserved.
009: *
010: * Contributing Author(s):
011: *
012: * Dave Raggett <dsr@w3.org>
013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
014: * Gary L Peskin <garyp@firstech.com> (Java development)
015: * Sami Lempinen <sami@lempinen.net> (release management)
016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
017: *
018: * The contributing author(s) would like to thank all those who
019: * helped with testing, bug fixes, and patience. This wouldn't
020: * have been possible without all of you.
021: *
022: * COPYRIGHT NOTICE:
023: *
024: * This software and documentation is provided "as is," and
025: * the copyright holders and contributing author(s) make no
026: * representations or warranties, express or implied, including
027: * but not limited to, warranties of merchantability or fitness
028: * for any particular purpose or that the use of the software or
029: * documentation will not infringe any third party patents,
030: * copyrights, trademarks or other rights.
031: *
032: * The copyright holders and contributing author(s) will not be
033: * liable for any direct, indirect, special or consequential damages
034: * arising out of any use of the software or documentation, even if
035: * advised of the possibility of such damage.
036: *
037: * Permission is hereby granted to use, copy, modify, and distribute
038: * this source code, or portions hereof, documentation and executables,
039: * for any purpose, without fee, subject to the following restrictions:
040: *
041: * 1. The origin of this source code must not be misrepresented.
042: * 2. Altered versions must be plainly marked as such and must
043: * not be misrepresented as being the original source.
044: * 3. This Copyright notice may not be removed or altered from any
045: * source or altered source distribution.
046: *
047: * The copyright holders and contributing author(s) specifically
048: * permit, without fee, and encourage the use of this source code
049: * as a component for supporting the Hypertext Markup Language in
050: * commercial products. If you use this source code in a product,
051: * acknowledgment is not required but would be appreciated.
052: *
053: */
054: package org.w3c.tidy;
055:
056: import java.util.Hashtable;
057: import java.util.Iterator;
058: import java.util.Map;
059:
060: /**
061: * Entity hash table.
062: * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
063: * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
064: * @author Fabrizio Giustina
065: * @version $Revision: 1.10 $ ($Author: fgiust $)
066: */
067: public final class EntityTable {
068:
069: /**
070: * the default entity table.
071: */
072: private static EntityTable defaultEntityTable;
073:
074: /**
075: * Known entities.
076: */
077: private static Entity[] entities = { new Entity("nbsp", 160),
078: new Entity("iexcl", 161), new Entity("cent", 162),
079: new Entity("pound", 163), new Entity("curren", 164),
080: new Entity("yen", 165), new Entity("brvbar", 166),
081: new Entity("sect", 167), new Entity("uml", 168),
082: new Entity("copy", 169), new Entity("ordf", 170),
083: new Entity("laquo", 171), new Entity("not", 172),
084: new Entity("shy", 173), new Entity("reg", 174),
085: new Entity("macr", 175), new Entity("deg", 176),
086: new Entity("plusmn", 177), new Entity("sup2", 178),
087: new Entity("sup3", 179), new Entity("acute", 180),
088: new Entity("micro", 181), new Entity("para", 182),
089: new Entity("middot", 183), new Entity("cedil", 184),
090: new Entity("sup1", 185), new Entity("ordm", 186),
091: new Entity("raquo", 187), new Entity("frac14", 188),
092: new Entity("frac12", 189), new Entity("frac34", 190),
093: new Entity("iquest", 191), new Entity("Agrave", 192),
094: new Entity("Aacute", 193), new Entity("Acirc", 194),
095: new Entity("Atilde", 195), new Entity("Auml", 196),
096: new Entity("Aring", 197), new Entity("AElig", 198),
097: new Entity("Ccedil", 199), new Entity("Egrave", 200),
098: new Entity("Eacute", 201), new Entity("Ecirc", 202),
099: new Entity("Euml", 203), new Entity("Igrave", 204),
100: new Entity("Iacute", 205), new Entity("Icirc", 206),
101: new Entity("Iuml", 207), new Entity("ETH", 208),
102: new Entity("Ntilde", 209), new Entity("Ograve", 210),
103: new Entity("Oacute", 211), new Entity("Ocirc", 212),
104: new Entity("Otilde", 213), new Entity("Ouml", 214),
105: new Entity("times", 215), new Entity("Oslash", 216),
106: new Entity("Ugrave", 217), new Entity("Uacute", 218),
107: new Entity("Ucirc", 219), new Entity("Uuml", 220),
108: new Entity("Yacute", 221), new Entity("THORN", 222),
109: new Entity("szlig", 223), new Entity("agrave", 224),
110: new Entity("aacute", 225), new Entity("acirc", 226),
111: new Entity("atilde", 227), new Entity("auml", 228),
112: new Entity("aring", 229), new Entity("aelig", 230),
113: new Entity("ccedil", 231), new Entity("egrave", 232),
114: new Entity("eacute", 233), new Entity("ecirc", 234),
115: new Entity("euml", 235), new Entity("igrave", 236),
116: new Entity("iacute", 237), new Entity("icirc", 238),
117: new Entity("iuml", 239), new Entity("eth", 240),
118: new Entity("ntilde", 241), new Entity("ograve", 242),
119: new Entity("oacute", 243), new Entity("ocirc", 244),
120: new Entity("otilde", 245), new Entity("ouml", 246),
121: new Entity("divide", 247), new Entity("oslash", 248),
122: new Entity("ugrave", 249), new Entity("uacute", 250),
123: new Entity("ucirc", 251), new Entity("uuml", 252),
124: new Entity("yacute", 253), new Entity("thorn", 254),
125: new Entity("yuml", 255), new Entity("fnof", 402),
126: new Entity("Alpha", 913), new Entity("Beta", 914),
127: new Entity("Gamma", 915), new Entity("Delta", 916),
128: new Entity("Epsilon", 917), new Entity("Zeta", 918),
129: new Entity("Eta", 919), new Entity("Theta", 920),
130: new Entity("Iota", 921), new Entity("Kappa", 922),
131: new Entity("Lambda", 923), new Entity("Mu", 924),
132: new Entity("Nu", 925), new Entity("Xi", 926),
133: new Entity("Omicron", 927), new Entity("Pi", 928),
134: new Entity("Rho", 929), new Entity("Sigma", 931),
135: new Entity("Tau", 932), new Entity("Upsilon", 933),
136: new Entity("Phi", 934), new Entity("Chi", 935),
137: new Entity("Psi", 936), new Entity("Omega", 937),
138: new Entity("alpha", 945), new Entity("beta", 946),
139: new Entity("gamma", 947), new Entity("delta", 948),
140: new Entity("epsilon", 949), new Entity("zeta", 950),
141: new Entity("eta", 951), new Entity("theta", 952),
142: new Entity("iota", 953), new Entity("kappa", 954),
143: new Entity("lambda", 955), new Entity("mu", 956),
144: new Entity("nu", 957), new Entity("xi", 958),
145: new Entity("omicron", 959), new Entity("pi", 960),
146: new Entity("rho", 961), new Entity("sigmaf", 962),
147: new Entity("sigma", 963), new Entity("tau", 964),
148: new Entity("upsilon", 965), new Entity("phi", 966),
149: new Entity("chi", 967), new Entity("psi", 968),
150: new Entity("omega", 969), new Entity("thetasym", 977),
151: new Entity("upsih", 978), new Entity("piv", 982),
152: new Entity("bull", 8226), new Entity("hellip", 8230),
153: new Entity("prime", 8242), new Entity("Prime", 8243),
154: new Entity("oline", 8254), new Entity("frasl", 8260),
155: new Entity("weierp", 8472), new Entity("image", 8465),
156: new Entity("real", 8476), new Entity("trade", 8482),
157: new Entity("alefsym", 8501), new Entity("larr", 8592),
158: new Entity("uarr", 8593), new Entity("rarr", 8594),
159: new Entity("darr", 8595), new Entity("harr", 8596),
160: new Entity("crarr", 8629), new Entity("lArr", 8656),
161: new Entity("uArr", 8657), new Entity("rArr", 8658),
162: new Entity("dArr", 8659), new Entity("hArr", 8660),
163: new Entity("forall", 8704), new Entity("part", 8706),
164: new Entity("exist", 8707), new Entity("empty", 8709),
165: new Entity("nabla", 8711), new Entity("isin", 8712),
166: new Entity("notin", 8713), new Entity("ni", 8715),
167: new Entity("prod", 8719), new Entity("sum", 8721),
168: new Entity("minus", 8722), new Entity("lowast", 8727),
169: new Entity("radic", 8730), new Entity("prop", 8733),
170: new Entity("infin", 8734), new Entity("ang", 8736),
171: new Entity("and", 8743), new Entity("or", 8744),
172: new Entity("cap", 8745), new Entity("cup", 8746),
173: new Entity("int", 8747), new Entity("there4", 8756),
174: new Entity("sim", 8764), new Entity("cong", 8773),
175: new Entity("asymp", 8776), new Entity("ne", 8800),
176: new Entity("equiv", 8801), new Entity("le", 8804),
177: new Entity("ge", 8805), new Entity("sub", 8834),
178: new Entity("sup", 8835), new Entity("nsub", 8836),
179: new Entity("sube", 8838), new Entity("supe", 8839),
180: new Entity("oplus", 8853), new Entity("otimes", 8855),
181: new Entity("perp", 8869), new Entity("sdot", 8901),
182: new Entity("lceil", 8968), new Entity("rceil", 8969),
183: new Entity("lfloor", 8970), new Entity("rfloor", 8971),
184: new Entity("lang", 9001), new Entity("rang", 9002),
185: new Entity("loz", 9674), new Entity("spades", 9824),
186: new Entity("clubs", 9827), new Entity("hearts", 9829),
187: new Entity("diams", 9830), new Entity("quot", 34),
188: new Entity("amp", 38), new Entity("lt", 60),
189: new Entity("gt", 62), new Entity("OElig", 338),
190: new Entity("oelig", 339), new Entity("Scaron", 352),
191: new Entity("scaron", 353), new Entity("Yuml", 376),
192: new Entity("circ", 710), new Entity("tilde", 732),
193: new Entity("ensp", 8194), new Entity("emsp", 8195),
194: new Entity("thinsp", 8201), new Entity("zwnj", 8204),
195: new Entity("zwj", 8205), new Entity("lrm", 8206),
196: new Entity("rlm", 8207), new Entity("ndash", 8211),
197: new Entity("mdash", 8212), new Entity("lsquo", 8216),
198: new Entity("rsquo", 8217), new Entity("sbquo", 8218),
199: new Entity("ldquo", 8220), new Entity("rdquo", 8221),
200: new Entity("bdquo", 8222), new Entity("dagger", 8224),
201: new Entity("Dagger", 8225), new Entity("permil", 8240),
202: new Entity("lsaquo", 8249), new Entity("rsaquo", 8250),
203: new Entity("euro", 8364) };
204:
205: /**
206: * Entity map.
207: */
208: private Map entityHashtable = new Hashtable();
209:
210: /**
211: * use getDefaultEntityTable to get an entity table instance.
212: */
213: private EntityTable() {
214: super ();
215: }
216:
217: /**
218: * installs an entity.
219: * @param ent entity
220: * @return installed Entity
221: */
222: private Entity install(Entity ent) {
223: return (Entity) this .entityHashtable.put(ent.getName(), ent);
224: }
225:
226: /**
227: * Lookup an entity by its name.
228: * @param name entity name
229: * @return entity
230: */
231: public Entity lookup(String name) {
232: return (Entity) this .entityHashtable.get(name);
233: }
234:
235: /**
236: * Returns the entity code for the given entity name.
237: * @param name entity name
238: * @return entity code or 0 for unknown entity names
239: */
240: public int entityCode(String name) {
241: // entity starting with "&" returns zero on error.
242: int c;
243:
244: if (name.length() <= 1) {
245: return 0;
246: }
247:
248: // numeric entitity: name = "&#" followed by number
249: if (name.charAt(1) == '#') {
250: c = 0; // zero on missing/bad number
251:
252: // 'x' prefix denotes hexadecimal number format
253: try {
254: if (name.length() >= 4 && name.charAt(2) == 'x') {
255: c = Integer.parseInt(name.substring(3), 16);
256: } else if (name.length() >= 3) {
257: c = Integer.parseInt(name.substring(2));
258: }
259: } catch (NumberFormatException e) {
260: // ignore
261: }
262:
263: return c;
264: }
265:
266: // Named entity: name ="&" followed by a name
267: Entity ent = lookup(name.substring(1));
268: if (ent != null) {
269: return ent.getCode();
270: }
271:
272: return 0; // zero signifies unknown entity name
273: }
274:
275: /**
276: * Returns the entity name for the given entity code.
277: * @param code entity code
278: * @return entity name or null for unknown entity codes
279: */
280: public String entityName(short code) {
281: String name = null;
282: Entity ent;
283: Iterator en = this .entityHashtable.values().iterator();
284: while (en.hasNext()) {
285: ent = (Entity) en.next();
286: if (ent.getCode() == code) {
287: name = ent.getName();
288: break;
289: }
290: }
291: return name;
292: }
293:
294: /**
295: * Returns the default entity table instance.
296: * @return entity table instance
297: */
298: public static EntityTable getDefaultEntityTable() {
299: if (defaultEntityTable == null) {
300: defaultEntityTable = new EntityTable();
301: for (int i = 0; i < entities.length; i++) {
302: defaultEntityTable.install(entities[i]);
303: }
304: }
305: return defaultEntityTable;
306: }
307:
308: }
|