001: /*
002: * @(#)EntityTable.java 1.11 2000/08/16
003: *
004: */
005:
006: package org.w3c.tidy;
007:
008: /**
009: *
010: * Entity hash table
011: *
012: * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
013: * See Tidy.java for the copyright notice.
014: * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
015: * HTML Tidy Release 4 Aug 2000</a>
016: *
017: * @author Dave Raggett <dsr@w3.org>
018: * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
019: * @version 1.0, 1999/05/22
020: * @version 1.0.1, 1999/05/29
021: * @version 1.1, 1999/06/18 Java Bean
022: * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
023: * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
024: * @version 1.4, 1999/09/04 DOM support
025: * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
026: * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
027: * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
028: * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
029: * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
030: * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
031: * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
032: */
033:
034: import java.util.Hashtable;
035: import java.util.Enumeration;
036:
037: public class EntityTable {
038:
039: public EntityTable() {
040: }
041:
042: public Entity lookup(String name) {
043: return (Entity) entityHashtable.get(name);
044: }
045:
046: public Entity install(String name, short code) {
047: Entity ent = lookup(name);
048: if (ent == null) {
049: ent = new Entity(name, code);
050: entityHashtable.put(name, ent);
051: } else {
052: ent.code = code;
053: }
054: return ent;
055: }
056:
057: public Entity install(Entity ent) {
058: return (Entity) entityHashtable.put(ent.name, ent);
059: }
060:
061: /* entity starting with "&" returns zero on error */
062: public short entityCode(String name) {
063: int c;
064:
065: if (name.length() <= 1)
066: return 0;
067:
068: /* numeric entitity: name = "&#" followed by number */
069: if (name.charAt(1) == '#') {
070: c = 0; /* zero on missing/bad number */
071:
072: /* 'x' prefix denotes hexadecimal number format */
073: try {
074: if (name.length() >= 4 && name.charAt(2) == 'x') {
075: c = Integer.parseInt(name.substring(3), 16);
076: } else if (name.length() >= 3) {
077: c = Integer.parseInt(name.substring(2));
078: }
079: } catch (NumberFormatException e) {
080: }
081:
082: return (short) c;
083: }
084:
085: /* Named entity: name ="&" followed by a name */
086: Entity ent = lookup(name.substring(1));
087: if (ent != null) {
088: return ent.code;
089: }
090:
091: return 0; /* zero signifies unknown entity name */
092: }
093:
094: public String entityName(short code) {
095: String name = null;
096: Entity ent;
097: Enumeration en = entityHashtable.elements();
098: while (en.hasMoreElements()) {
099: ent = (Entity) en.nextElement();
100: if (ent.code == code) {
101: name = ent.name;
102: break;
103: }
104: }
105: return name;
106: }
107:
108: private Hashtable entityHashtable = new Hashtable();
109:
110: private static EntityTable defaultEntityTable = null;
111:
112: private static Entity[] entities = {
113:
114: new Entity("nbsp", 160), new Entity("iexcl", 161),
115: new Entity("cent", 162), new Entity("pound", 163),
116: new Entity("curren", 164), new Entity("yen", 165),
117: new Entity("brvbar", 166), new Entity("sect", 167),
118: new Entity("uml", 168), new Entity("copy", 169),
119: new Entity("ordf", 170), new Entity("laquo", 171),
120: new Entity("not", 172), new Entity("shy", 173),
121: new Entity("reg", 174), new Entity("macr", 175),
122: new Entity("deg", 176), new Entity("plusmn", 177),
123: new Entity("sup2", 178), new Entity("sup3", 179),
124: new Entity("acute", 180), new Entity("micro", 181),
125: new Entity("para", 182), new Entity("middot", 183),
126: new Entity("cedil", 184), new Entity("sup1", 185),
127: new Entity("ordm", 186), new Entity("raquo", 187),
128: new Entity("frac14", 188), new Entity("frac12", 189),
129: new Entity("frac34", 190), new Entity("iquest", 191),
130: new Entity("Agrave", 192), new Entity("Aacute", 193),
131: new Entity("Acirc", 194), new Entity("Atilde", 195),
132: new Entity("Auml", 196), new Entity("Aring", 197),
133: new Entity("AElig", 198), new Entity("Ccedil", 199),
134: new Entity("Egrave", 200), new Entity("Eacute", 201),
135: new Entity("Ecirc", 202), new Entity("Euml", 203),
136: new Entity("Igrave", 204), new Entity("Iacute", 205),
137: new Entity("Icirc", 206), new Entity("Iuml", 207),
138: new Entity("ETH", 208), new Entity("Ntilde", 209),
139: new Entity("Ograve", 210), new Entity("Oacute", 211),
140: new Entity("Ocirc", 212), new Entity("Otilde", 213),
141: new Entity("Ouml", 214), new Entity("times", 215),
142: new Entity("Oslash", 216), new Entity("Ugrave", 217),
143: new Entity("Uacute", 218), new Entity("Ucirc", 219),
144: new Entity("Uuml", 220), new Entity("Yacute", 221),
145: new Entity("THORN", 222), new Entity("szlig", 223),
146: new Entity("agrave", 224), new Entity("aacute", 225),
147: new Entity("acirc", 226), new Entity("atilde", 227),
148: new Entity("auml", 228), new Entity("aring", 229),
149: new Entity("aelig", 230), new Entity("ccedil", 231),
150: new Entity("egrave", 232), new Entity("eacute", 233),
151: new Entity("ecirc", 234), new Entity("euml", 235),
152: new Entity("igrave", 236), new Entity("iacute", 237),
153: new Entity("icirc", 238), new Entity("iuml", 239),
154: new Entity("eth", 240), new Entity("ntilde", 241),
155: new Entity("ograve", 242), new Entity("oacute", 243),
156: new Entity("ocirc", 244), new Entity("otilde", 245),
157: new Entity("ouml", 246), new Entity("divide", 247),
158: new Entity("oslash", 248), new Entity("ugrave", 249),
159: new Entity("uacute", 250), new Entity("ucirc", 251),
160: new Entity("uuml", 252), new Entity("yacute", 253),
161: new Entity("thorn", 254), new Entity("yuml", 255),
162: new Entity("fnof", 402), new Entity("Alpha", 913),
163: new Entity("Beta", 914), new Entity("Gamma", 915),
164: new Entity("Delta", 916), new Entity("Epsilon", 917),
165: new Entity("Zeta", 918), new Entity("Eta", 919),
166: new Entity("Theta", 920), new Entity("Iota", 921),
167: new Entity("Kappa", 922), new Entity("Lambda", 923),
168: new Entity("Mu", 924), new Entity("Nu", 925),
169: new Entity("Xi", 926), new Entity("Omicron", 927),
170: new Entity("Pi", 928), new Entity("Rho", 929),
171: new Entity("Sigma", 931), new Entity("Tau", 932),
172: new Entity("Upsilon", 933), new Entity("Phi", 934),
173: new Entity("Chi", 935), new Entity("Psi", 936),
174: new Entity("Omega", 937), new Entity("alpha", 945),
175: new Entity("beta", 946), new Entity("gamma", 947),
176: new Entity("delta", 948), new Entity("epsilon", 949),
177: new Entity("zeta", 950), new Entity("eta", 951),
178: new Entity("theta", 952), new Entity("iota", 953),
179: new Entity("kappa", 954), new Entity("lambda", 955),
180: new Entity("mu", 956), new Entity("nu", 957),
181: new Entity("xi", 958), new Entity("omicron", 959),
182: new Entity("pi", 960), new Entity("rho", 961),
183: new Entity("sigmaf", 962), new Entity("sigma", 963),
184: new Entity("tau", 964), new Entity("upsilon", 965),
185: new Entity("phi", 966), new Entity("chi", 967),
186: new Entity("psi", 968), new Entity("omega", 969),
187: new Entity("thetasym", 977), new Entity("upsih", 978),
188: new Entity("piv", 982), new Entity("bull", 8226),
189: new Entity("hellip", 8230), new Entity("prime", 8242),
190: new Entity("Prime", 8243), new Entity("oline", 8254),
191: new Entity("frasl", 8260), new Entity("weierp", 8472),
192: new Entity("image", 8465), new Entity("real", 8476),
193: new Entity("trade", 8482), new Entity("alefsym", 8501),
194: new Entity("larr", 8592), new Entity("uarr", 8593),
195: new Entity("rarr", 8594), new Entity("darr", 8595),
196: new Entity("harr", 8596), new Entity("crarr", 8629),
197: new Entity("lArr", 8656), new Entity("uArr", 8657),
198: new Entity("rArr", 8658), new Entity("dArr", 8659),
199: new Entity("hArr", 8660), new Entity("forall", 8704),
200: new Entity("part", 8706), new Entity("exist", 8707),
201: new Entity("empty", 8709), new Entity("nabla", 8711),
202: new Entity("isin", 8712), new Entity("notin", 8713),
203: new Entity("ni", 8715), new Entity("prod", 8719),
204: new Entity("sum", 8721), new Entity("minus", 8722),
205: new Entity("lowast", 8727), new Entity("radic", 8730),
206: new Entity("prop", 8733), new Entity("infin", 8734),
207: new Entity("ang", 8736), new Entity("and", 8743),
208: new Entity("or", 8744), new Entity("cap", 8745),
209: new Entity("cup", 8746), new Entity("int", 8747),
210: new Entity("there4", 8756), new Entity("sim", 8764),
211: new Entity("cong", 8773), new Entity("asymp", 8776),
212: new Entity("ne", 8800), new Entity("equiv", 8801),
213: new Entity("le", 8804), new Entity("ge", 8805),
214: new Entity("sub", 8834), new Entity("sup", 8835),
215: new Entity("nsub", 8836), new Entity("sube", 8838),
216: new Entity("supe", 8839), new Entity("oplus", 8853),
217: new Entity("otimes", 8855), new Entity("perp", 8869),
218: new Entity("sdot", 8901), new Entity("lceil", 8968),
219: new Entity("rceil", 8969), new Entity("lfloor", 8970),
220: new Entity("rfloor", 8971), new Entity("lang", 9001),
221: new Entity("rang", 9002), new Entity("loz", 9674),
222: new Entity("spades", 9824), new Entity("clubs", 9827),
223: new Entity("hearts", 9829), new Entity("diams", 9830),
224: new Entity("quot", 34), new Entity("amp", 38),
225: new Entity("lt", 60), new Entity("gt", 62),
226: new Entity("OElig", 338), new Entity("oelig", 339),
227: new Entity("Scaron", 352), new Entity("scaron", 353),
228: new Entity("Yuml", 376), new Entity("circ", 710),
229: new Entity("tilde", 732), new Entity("ensp", 8194),
230: new Entity("emsp", 8195), new Entity("thinsp", 8201),
231: new Entity("zwnj", 8204), new Entity("zwj", 8205),
232: new Entity("lrm", 8206), new Entity("rlm", 8207),
233: new Entity("ndash", 8211), new Entity("mdash", 8212),
234: new Entity("lsquo", 8216), new Entity("rsquo", 8217),
235: new Entity("sbquo", 8218), new Entity("ldquo", 8220),
236: new Entity("rdquo", 8221), new Entity("bdquo", 8222),
237: new Entity("dagger", 8224), new Entity("Dagger", 8225),
238: new Entity("permil", 8240), new Entity("lsaquo", 8249),
239: new Entity("rsaquo", 8250), new Entity("euro", 8364)
240:
241: };
242:
243: public static EntityTable getDefaultEntityTable() {
244: if (defaultEntityTable == null) {
245: defaultEntityTable = new EntityTable();
246: for (int i = 0; i < entities.length; i++) {
247: defaultEntityTable.install(entities[i]);
248: }
249: }
250: return defaultEntityTable;
251: }
252:
253: }
|