0001: /*
0002: * Java HTML Tidy - JTidy
0003: * HTML parser and pretty printer
0004: *
0005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0006: * Institute of Technology, Institut National de Recherche en
0007: * Informatique et en Automatique, Keio University). All Rights
0008: * Reserved.
0009: *
0010: * Contributing Author(s):
0011: *
0012: * Dave Raggett <dsr@w3.org>
0013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0014: * Gary L Peskin <garyp@firstech.com> (Java development)
0015: * Sami Lempinen <sami@lempinen.net> (release management)
0016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
0017: *
0018: * The contributing author(s) would like to thank all those who
0019: * helped with testing, bug fixes, and patience. This wouldn't
0020: * have been possible without all of you.
0021: *
0022: * COPYRIGHT NOTICE:
0023: *
0024: * This software and documentation is provided "as is," and
0025: * the copyright holders and contributing author(s) make no
0026: * representations or warranties, express or implied, including
0027: * but not limited to, warranties of merchantability or fitness
0028: * for any particular purpose or that the use of the software or
0029: * documentation will not infringe any third party patents,
0030: * copyrights, trademarks or other rights.
0031: *
0032: * The copyright holders and contributing author(s) will not be
0033: * liable for any direct, indirect, special or consequential damages
0034: * arising out of any use of the software or documentation, even if
0035: * advised of the possibility of such damage.
0036: *
0037: * Permission is hereby granted to use, copy, modify, and distribute
0038: * this source code, or portions hereof, documentation and executables,
0039: * for any purpose, without fee, subject to the following restrictions:
0040: *
0041: * 1. The origin of this source code must not be misrepresented.
0042: * 2. Altered versions must be plainly marked as such and must
0043: * not be misrepresented as being the original source.
0044: * 3. This Copyright notice may not be removed or altered from any
0045: * source or altered source distribution.
0046: *
0047: * The copyright holders and contributing author(s) specifically
0048: * permit, without fee, and encourage the use of this source code
0049: * as a component for supporting the Hypertext Markup Language in
0050: * commercial products. If you use this source code in a product,
0051: * acknowledgment is not required but would be appreciated.
0052: *
0053: */
0054: package org.w3c.tidy;
0055:
0056: import java.util.ArrayList;
0057: import java.util.Hashtable;
0058: import java.util.Iterator;
0059: import java.util.List;
0060: import java.util.Map;
0061:
0062: /**
0063: * Tag dictionary node hash table.
0064: * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
0065: * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
0066: * @author Fabrizio Giustina
0067: * @version $Revision: 1.31 $ ($Author: fgiust $)
0068: */
0069: public final class TagTable {
0070:
0071: /**
0072: * dummy entry for all xml tags.
0073: */
0074: public static final Dict XML_TAGS = new Dict(null, Dict.VERS_ALL,
0075: Dict.CM_BLOCK, null, null);
0076:
0077: /**
0078: * all the known tags.
0079: */
0080: private static final Dict[] TAGS = {
0081: new Dict("html", Dict.VERS_ALL,
0082: (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
0083: ParserImpl.HTML, TagCheckImpl.HTML),
0084: new Dict("head", Dict.VERS_ALL,
0085: (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
0086: ParserImpl.HEAD, null),
0087: new Dict("title", Dict.VERS_ALL, Dict.CM_HEAD,
0088: ParserImpl.TITLE, null),
0089: new Dict("base", Dict.VERS_ALL,
0090: (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY,
0091: null),
0092: new Dict("link", Dict.VERS_ALL,
0093: (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY,
0094: TagCheckImpl.LINK),
0095: new Dict("meta", Dict.VERS_ALL,
0096: (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY,
0097: TagCheckImpl.META),
0098: new Dict("style",
0099: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0100: Dict.CM_HEAD, ParserImpl.SCRIPT, TagCheckImpl.STYLE),
0101: new Dict(
0102: "script",
0103: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0104: (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
0105: ParserImpl.SCRIPT, TagCheckImpl.SCRIPT),
0106: new Dict("server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD
0107: | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
0108: ParserImpl.SCRIPT, null),
0109: new Dict("body", Dict.VERS_ALL,
0110: (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
0111: ParserImpl.BODY, null),
0112: new Dict("frameset", Dict.VERS_FRAMESET,
0113: (Dict.CM_HTML | Dict.CM_FRAMES),
0114: ParserImpl.FRAMESET, null),
0115: new Dict("p", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OPT),
0116: ParserImpl.INLINE, null),
0117: new Dict("h1", Dict.VERS_ALL,
0118: (Dict.CM_BLOCK | Dict.CM_HEADING),
0119: ParserImpl.INLINE, null),
0120: new Dict("h2", Dict.VERS_ALL,
0121: (Dict.CM_BLOCK | Dict.CM_HEADING),
0122: ParserImpl.INLINE, null),
0123: new Dict("h3", Dict.VERS_ALL,
0124: (Dict.CM_BLOCK | Dict.CM_HEADING),
0125: ParserImpl.INLINE, null),
0126: new Dict("h4", Dict.VERS_ALL,
0127: (Dict.CM_BLOCK | Dict.CM_HEADING),
0128: ParserImpl.INLINE, null),
0129: new Dict("h5", Dict.VERS_ALL,
0130: (Dict.CM_BLOCK | Dict.CM_HEADING),
0131: ParserImpl.INLINE, null),
0132: new Dict("h6", Dict.VERS_ALL,
0133: (Dict.CM_BLOCK | Dict.CM_HEADING),
0134: ParserImpl.INLINE, null),
0135: new Dict("ul", Dict.VERS_ALL, Dict.CM_BLOCK,
0136: ParserImpl.LIST, null),
0137: new Dict("ol", Dict.VERS_ALL, Dict.CM_BLOCK,
0138: ParserImpl.LIST, null),
0139: new Dict("dl", Dict.VERS_ALL, Dict.CM_BLOCK,
0140: ParserImpl.DEFLIST, null),
0141: new Dict("dir", Dict.VERS_LOOSE,
0142: (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
0143: ParserImpl.LIST, null),
0144: new Dict("menu", Dict.VERS_LOOSE,
0145: (Dict.CM_BLOCK | Dict.CM_OBSOLETE),
0146: ParserImpl.LIST, null),
0147: new Dict("pre", Dict.VERS_ALL, Dict.CM_BLOCK,
0148: ParserImpl.PRE, null),
0149: new Dict("listing", Dict.VERS_ALL,
0150: (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE,
0151: null),
0152: new Dict("xmp", Dict.VERS_ALL,
0153: (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE,
0154: null),
0155: new Dict("plaintext", Dict.VERS_ALL,
0156: (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE,
0157: null),
0158: new Dict("address", Dict.VERS_ALL, Dict.CM_BLOCK,
0159: ParserImpl.BLOCK, null),
0160: new Dict("blockquote", Dict.VERS_ALL, Dict.CM_BLOCK,
0161: ParserImpl.BLOCK, null),
0162: new Dict("form", Dict.VERS_ALL, Dict.CM_BLOCK,
0163: ParserImpl.BLOCK, TagCheckImpl.FORM),
0164: new Dict("isindex", Dict.VERS_LOOSE,
0165: (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY,
0166: null),
0167: new Dict("fieldset",
0168: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0169: Dict.CM_BLOCK, ParserImpl.BLOCK, null),
0170: new Dict("table", Dict.VERS_FROM32, Dict.CM_BLOCK,
0171: ParserImpl.TABLETAG, TagCheckImpl.TABLE),
0172: new Dict("hr", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
0173: (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY,
0174: TagCheckImpl.HR),
0175: new Dict("div", Dict.VERS_FROM32, Dict.CM_BLOCK,
0176: ParserImpl.BLOCK, null),
0177: new Dict("multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK,
0178: ParserImpl.BLOCK, null),
0179: new Dict("nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK,
0180: ParserImpl.BLOCK, null),
0181: new Dict("layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK,
0182: ParserImpl.BLOCK, null),
0183: new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
0184: ParserImpl.INLINE, null),
0185: new Dict("nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK
0186: | Dict.CM_INLINE | Dict.CM_MIXED),
0187: ParserImpl.BLOCK, null),
0188: new Dict("align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK,
0189: ParserImpl.BLOCK, null),
0190: new Dict("center", Dict.VERS_LOOSE, Dict.CM_BLOCK,
0191: ParserImpl.BLOCK, null),
0192: new Dict("ins",
0193: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0194: (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
0195: ParserImpl.INLINE, null),
0196: new Dict("del",
0197: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0198: (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
0199: ParserImpl.INLINE, null),
0200: new Dict("li", Dict.VERS_ALL,
0201: (Dict.CM_LIST | Dict.CM_OPT | Dict.CM_NO_INDENT),
0202: ParserImpl.BLOCK, null),
0203: new Dict("dt", Dict.VERS_ALL, (Dict.CM_DEFLIST
0204: | Dict.CM_OPT | Dict.CM_NO_INDENT),
0205: ParserImpl.INLINE, null),
0206: new Dict("dd", Dict.VERS_ALL, (Dict.CM_DEFLIST
0207: | Dict.CM_OPT | Dict.CM_NO_INDENT),
0208: ParserImpl.BLOCK, null),
0209: new Dict("caption", Dict.VERS_FROM32, Dict.CM_TABLE,
0210: ParserImpl.INLINE, TagCheckImpl.CAPTION),
0211: new Dict("colgroup", Dict.VERS_HTML40,
0212: (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.COLGROUP,
0213: null),
0214: new Dict("col", Dict.VERS_HTML40,
0215: (Dict.CM_TABLE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0216: null),
0217: new Dict("thead",
0218: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0219: (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
0220: ParserImpl.ROWGROUP, null),
0221: new Dict("tfoot",
0222: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0223: (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
0224: ParserImpl.ROWGROUP, null),
0225: new Dict("tbody",
0226: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0227: (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
0228: ParserImpl.ROWGROUP, null),
0229: new Dict("tr", Dict.VERS_FROM32,
0230: (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.ROW, null),
0231: new Dict("td", Dict.VERS_FROM32,
0232: (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
0233: ParserImpl.BLOCK, TagCheckImpl.TABLECELL),
0234: new Dict("th", Dict.VERS_FROM32,
0235: (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
0236: ParserImpl.BLOCK, TagCheckImpl.TABLECELL),
0237: new Dict("q", Dict.VERS_HTML40, Dict.CM_INLINE,
0238: ParserImpl.INLINE, null),
0239: new Dict("a", Dict.VERS_ALL, Dict.CM_INLINE,
0240: ParserImpl.INLINE, TagCheckImpl.ANCHOR),
0241: new Dict("br", Dict.VERS_ALL,
0242: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0243: null),
0244: new Dict("img", Dict.VERS_ALL, (Dict.CM_INLINE
0245: | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY,
0246: TagCheckImpl.IMG),
0247: new Dict("object", Dict.VERS_HTML40,
0248: (Dict.CM_OBJECT | Dict.CM_HEAD | Dict.CM_IMG
0249: | Dict.CM_INLINE | Dict.CM_PARAM),
0250: ParserImpl.BLOCK, null),
0251: new Dict("applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT
0252: | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
0253: ParserImpl.BLOCK, null),
0254: new Dict("servlet", Dict.VERS_SUN, (Dict.CM_OBJECT
0255: | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
0256: ParserImpl.BLOCK, null),
0257: new Dict("param", Dict.VERS_FROM32,
0258: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0259: null),
0260: new Dict("embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE
0261: | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY,
0262: null),
0263: new Dict("noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
0264: ParserImpl.INLINE, null),
0265: new Dict("iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE,
0266: ParserImpl.BLOCK, null),
0267: new Dict("frame", Dict.VERS_FRAMESET,
0268: (Dict.CM_FRAMES | Dict.CM_EMPTY), ParserImpl.EMPTY,
0269: null),
0270: new Dict("noframes", Dict.VERS_IFRAME,
0271: (Dict.CM_BLOCK | Dict.CM_FRAMES),
0272: ParserImpl.NOFRAMES, null),
0273: new Dict("noscript",
0274: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0275: (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
0276: ParserImpl.BLOCK, null),
0277: new Dict("b", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
0278: Dict.CM_INLINE, ParserImpl.INLINE, null),
0279: new Dict("i", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
0280: Dict.CM_INLINE, ParserImpl.INLINE, null),
0281: new Dict("u", Dict.VERS_LOOSE, Dict.CM_INLINE,
0282: ParserImpl.INLINE, null),
0283: new Dict("tt", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
0284: Dict.CM_INLINE, ParserImpl.INLINE, null),
0285: new Dict("s", Dict.VERS_LOOSE, Dict.CM_INLINE,
0286: ParserImpl.INLINE, null),
0287: new Dict("strike", Dict.VERS_LOOSE, Dict.CM_INLINE,
0288: ParserImpl.INLINE, null),
0289: new Dict("big",
0290: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0291: Dict.CM_INLINE, ParserImpl.INLINE, null),
0292: new Dict("small",
0293: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0294: Dict.CM_INLINE, ParserImpl.INLINE, null),
0295: new Dict("sub",
0296: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0297: Dict.CM_INLINE, ParserImpl.INLINE, null),
0298: new Dict("sup",
0299: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0300: Dict.CM_INLINE, ParserImpl.INLINE, null),
0301: new Dict("em", Dict.VERS_ALL, Dict.CM_INLINE,
0302: ParserImpl.INLINE, null),
0303: new Dict("strong", Dict.VERS_ALL, Dict.CM_INLINE,
0304: ParserImpl.INLINE, null),
0305: new Dict("dfn", Dict.VERS_ALL, Dict.CM_INLINE,
0306: ParserImpl.INLINE, null),
0307: new Dict("code", Dict.VERS_ALL, Dict.CM_INLINE,
0308: ParserImpl.INLINE, null),
0309: new Dict("samp", Dict.VERS_ALL, Dict.CM_INLINE,
0310: ParserImpl.INLINE, null),
0311: new Dict("kbd", Dict.VERS_ALL, Dict.CM_INLINE,
0312: ParserImpl.INLINE, null),
0313: new Dict("var", Dict.VERS_ALL, Dict.CM_INLINE,
0314: ParserImpl.INLINE, null),
0315: new Dict("cite", Dict.VERS_ALL, Dict.CM_INLINE,
0316: ParserImpl.INLINE, null),
0317: new Dict("abbr", Dict.VERS_HTML40, Dict.CM_INLINE,
0318: ParserImpl.INLINE, null),
0319: new Dict("acronym", Dict.VERS_HTML40, Dict.CM_INLINE,
0320: ParserImpl.INLINE, null),
0321: new Dict("span", Dict.VERS_FROM32, Dict.CM_INLINE,
0322: ParserImpl.INLINE, null),
0323: new Dict("blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE,
0324: ParserImpl.INLINE, null),
0325: new Dict("nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE,
0326: ParserImpl.INLINE, null),
0327: new Dict("wbr", Dict.VERS_PROPRIETARY,
0328: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0329: null),
0330: new Dict("marquee", Dict.VERS_MICROSOFT,
0331: (Dict.CM_INLINE | Dict.CM_OPT), ParserImpl.INLINE,
0332: null),
0333: new Dict("bgsound", Dict.VERS_MICROSOFT,
0334: (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY,
0335: null),
0336: new Dict("comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE,
0337: ParserImpl.INLINE, null),
0338: new Dict("spacer", Dict.VERS_NETSCAPE,
0339: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0340: null),
0341: new Dict("keygen", Dict.VERS_NETSCAPE,
0342: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0343: null),
0344: new Dict("nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK
0345: | Dict.CM_INLINE | Dict.CM_MIXED),
0346: ParserImpl.BLOCK, null),
0347: new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE,
0348: ParserImpl.INLINE, null),
0349: new Dict("map",
0350: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0351: Dict.CM_INLINE, ParserImpl.BLOCK, TagCheckImpl.MAP),
0352: new Dict("area",
0353: (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
0354: (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY,
0355: TagCheckImpl.AREA),
0356: new Dict("input", Dict.VERS_ALL, (Dict.CM_INLINE
0357: | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY,
0358: null),
0359: new Dict("select", Dict.VERS_ALL,
0360: (Dict.CM_INLINE | Dict.CM_FIELD),
0361: ParserImpl.SELECT, null),
0362: new Dict("option", Dict.VERS_ALL,
0363: (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.TEXT,
0364: null),
0365: new Dict("optgroup",
0366: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0367: (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.OPTGROUP,
0368: null),
0369: new Dict("textarea", Dict.VERS_ALL,
0370: (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.TEXT,
0371: null),
0372: new Dict("label", Dict.VERS_HTML40, Dict.CM_INLINE,
0373: ParserImpl.INLINE, null),
0374: new Dict("legend",
0375: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0376: Dict.CM_INLINE, ParserImpl.INLINE, null),
0377: new Dict("button",
0378: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0379: Dict.CM_INLINE, ParserImpl.INLINE, null),
0380: new Dict("basefont", Dict.VERS_LOOSE,
0381: (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY,
0382: null),
0383: new Dict("font", Dict.VERS_LOOSE, Dict.CM_INLINE,
0384: ParserImpl.INLINE, null),
0385: new Dict("bdo",
0386: (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
0387: Dict.CM_INLINE, ParserImpl.INLINE, null),
0388: // elements for XHTML 1.1
0389: new Dict("ruby", Dict.VERS_XHTML11, Dict.CM_INLINE,
0390: ParserImpl.INLINE, null),
0391: new Dict("rbc", Dict.VERS_XHTML11, Dict.CM_INLINE,
0392: ParserImpl.INLINE, null),
0393: new Dict("rtc", Dict.VERS_XHTML11, Dict.CM_INLINE,
0394: ParserImpl.INLINE, null),
0395: new Dict("rb", Dict.VERS_XHTML11, Dict.CM_INLINE,
0396: ParserImpl.INLINE, null),
0397: new Dict("rt", Dict.VERS_XHTML11, Dict.CM_INLINE,
0398: ParserImpl.INLINE, null),
0399: new Dict("", Dict.VERS_XHTML11, Dict.CM_INLINE,
0400: ParserImpl.INLINE, null),
0401: new Dict("rp", Dict.VERS_XHTML11, Dict.CM_INLINE,
0402: ParserImpl.INLINE, null),
0403: //
0404: };
0405:
0406: /**
0407: * html tag.
0408: */
0409: protected Dict tagHtml;
0410:
0411: /**
0412: * head tag.
0413: */
0414: protected Dict tagHead;
0415:
0416: /**
0417: * body tag.
0418: */
0419: protected Dict tagBody;
0420:
0421: /**
0422: * frameset tag.
0423: */
0424: protected Dict tagFrameset;
0425:
0426: /**
0427: * frame tag.
0428: */
0429: protected Dict tagFrame;
0430:
0431: /**
0432: * iframe tag.
0433: */
0434: protected Dict tagIframe;
0435:
0436: /**
0437: * noframes tag.
0438: */
0439: protected Dict tagNoframes;
0440:
0441: /**
0442: * meta tag.
0443: */
0444: protected Dict tagMeta;
0445:
0446: /**
0447: * title tag.
0448: */
0449: protected Dict tagTitle;
0450:
0451: /**
0452: * base tag.
0453: */
0454: protected Dict tagBase;
0455:
0456: /**
0457: * hr tag.
0458: */
0459: protected Dict tagHr;
0460:
0461: /**
0462: * pre tag.
0463: */
0464: protected Dict tagPre;
0465:
0466: /**
0467: * listing tag.
0468: */
0469: protected Dict tagListing;
0470:
0471: /**
0472: * h1 tag.
0473: */
0474: protected Dict tagH1;
0475:
0476: /**
0477: * h2 tag.
0478: */
0479: protected Dict tagH2;
0480:
0481: /**
0482: * p tag.
0483: */
0484: protected Dict tagP;
0485:
0486: /**
0487: * ul tag.
0488: */
0489: protected Dict tagUl;
0490:
0491: /**
0492: * ol tag.
0493: */
0494: protected Dict tagOl;
0495:
0496: /**
0497: * dir tag.
0498: */
0499: protected Dict tagDir;
0500:
0501: /**
0502: * li tag.
0503: */
0504: protected Dict tagLi;
0505:
0506: /**
0507: * dt tag.
0508: */
0509: protected Dict tagDt;
0510:
0511: /**
0512: * dd tag.
0513: */
0514: protected Dict tagDd;
0515:
0516: /**
0517: * dl tag.
0518: */
0519: protected Dict tagDl;
0520:
0521: /**
0522: * td tag.
0523: */
0524: protected Dict tagTd;
0525:
0526: /**
0527: * th tag.
0528: */
0529: protected Dict tagTh;
0530:
0531: /**
0532: * tr tag.
0533: */
0534: protected Dict tagTr;
0535:
0536: /**
0537: * col tag.
0538: */
0539: protected Dict tagCol;
0540:
0541: /**
0542: * colgroup tag.
0543: */
0544: protected Dict tagColgroup;
0545:
0546: /**
0547: * br tag.
0548: */
0549: protected Dict tagBr;
0550:
0551: /**
0552: * a tag.
0553: */
0554: protected Dict tagA;
0555:
0556: /**
0557: * link tag.
0558: */
0559: protected Dict tagLink;
0560:
0561: /**
0562: * b tag.
0563: */
0564: protected Dict tagB;
0565:
0566: /**
0567: * i tag.
0568: */
0569: protected Dict tagI;
0570:
0571: /**
0572: * strong tag.
0573: */
0574: protected Dict tagStrong;
0575:
0576: /**
0577: * em tag.
0578: */
0579: protected Dict tagEm;
0580:
0581: /**
0582: * big tag.
0583: */
0584: protected Dict tagBig;
0585:
0586: /**
0587: * small tag.
0588: */
0589: protected Dict tagSmall;
0590:
0591: /**
0592: * param tag.
0593: */
0594: protected Dict tagParam;
0595:
0596: /**
0597: * option tag.
0598: */
0599: protected Dict tagOption;
0600:
0601: /**
0602: * optgroup tag.
0603: */
0604: protected Dict tagOptgroup;
0605:
0606: /**
0607: * img tag.
0608: */
0609: protected Dict tagImg;
0610:
0611: /**
0612: * map tag.
0613: */
0614: protected Dict tagMap;
0615:
0616: /**
0617: * area tag.
0618: */
0619: protected Dict tagArea;
0620:
0621: /**
0622: * nobr tag.
0623: */
0624: protected Dict tagNobr;
0625:
0626: /**
0627: * wbr tag.
0628: */
0629: protected Dict tagWbr;
0630:
0631: /**
0632: * font tag.
0633: */
0634: protected Dict tagFont;
0635:
0636: /**
0637: * spacer tag.
0638: */
0639: protected Dict tagSpacer;
0640:
0641: /**
0642: * layer tag.
0643: */
0644: protected Dict tagLayer;
0645:
0646: /**
0647: * center tag.
0648: */
0649: protected Dict tagCenter;
0650:
0651: /**
0652: * style tag.
0653: */
0654: protected Dict tagStyle;
0655:
0656: /**
0657: * script tag.
0658: */
0659: protected Dict tagScript;
0660:
0661: /**
0662: * noscript tag.
0663: */
0664: protected Dict tagNoscript;
0665:
0666: /**
0667: * table tag.
0668: */
0669: protected Dict tagTable;
0670:
0671: /**
0672: * caption tag.
0673: */
0674: protected Dict tagCaption;
0675:
0676: /**
0677: * form tag.
0678: */
0679: protected Dict tagForm;
0680:
0681: /**
0682: * textarea tag.
0683: */
0684: protected Dict tagTextarea;
0685:
0686: /**
0687: * blockquote tag.
0688: */
0689: protected Dict tagBlockquote;
0690:
0691: /**
0692: * applet tag.
0693: */
0694: protected Dict tagApplet;
0695:
0696: /**
0697: * object tag.
0698: */
0699: protected Dict tagObject;
0700:
0701: /**
0702: * div tag.
0703: */
0704: protected Dict tagDiv;
0705:
0706: /**
0707: * span tag.
0708: */
0709: protected Dict tagSpan;
0710:
0711: /**
0712: * input tag.
0713: */
0714: protected Dict tagInput;
0715:
0716: /**
0717: * tag.
0718: */
0719: protected Dict tagQ;
0720:
0721: /**
0722: * a proprietary tag added by Tidy, along with tag_nobr, tag_wbr.
0723: */
0724: protected Dict tagBlink;
0725:
0726: /**
0727: * anchor/node hash.
0728: */
0729: protected Anchor anchorList;
0730:
0731: /**
0732: * configuration.
0733: */
0734: private Configuration configuration;
0735:
0736: /**
0737: * hashTable containing tags.
0738: */
0739: private Map tagHashtable = new Hashtable();
0740:
0741: /**
0742: * Instantiates a new tag table with known tags.
0743: */
0744: protected TagTable() {
0745: for (int i = 0; i < TAGS.length; i++) {
0746: install(TAGS[i]);
0747: }
0748: tagHtml = lookup("html");
0749: tagHead = lookup("head");
0750: tagBody = lookup("body");
0751: tagFrameset = lookup("frameset");
0752: tagFrame = lookup("frame");
0753: tagIframe = lookup("iframe");
0754: tagNoframes = lookup("noframes");
0755: tagMeta = lookup("meta");
0756: tagTitle = lookup("title");
0757: tagBase = lookup("base");
0758: tagHr = lookup("hr");
0759: tagPre = lookup("pre");
0760: tagListing = lookup("listing");
0761: tagH1 = lookup("h1");
0762: tagH2 = lookup("h2");
0763: tagP = lookup("p");
0764: tagUl = lookup("ul");
0765: tagOl = lookup("ol");
0766: tagDir = lookup("dir");
0767: tagLi = lookup("li");
0768: tagDt = lookup("dt");
0769: tagDd = lookup("dd");
0770: tagDl = lookup("dl");
0771: tagTd = lookup("td");
0772: tagTh = lookup("th");
0773: tagTr = lookup("tr");
0774: tagCol = lookup("col");
0775: tagColgroup = lookup("colgroup");
0776: tagBr = lookup("br");
0777: tagA = lookup("a");
0778: tagLink = lookup("link");
0779: tagB = lookup("b");
0780: tagI = lookup("i");
0781: tagStrong = lookup("strong");
0782: tagEm = lookup("em");
0783: tagBig = lookup("big");
0784: tagSmall = lookup("small");
0785: tagParam = lookup("param");
0786: tagOption = lookup("option");
0787: tagOptgroup = lookup("optgroup");
0788: tagImg = lookup("img");
0789: tagMap = lookup("map");
0790: tagArea = lookup("area");
0791: tagNobr = lookup("nobr");
0792: tagWbr = lookup("wbr");
0793: tagFont = lookup("font");
0794: tagSpacer = lookup("spacer");
0795: tagLayer = lookup("layer");
0796: tagCenter = lookup("center");
0797: tagStyle = lookup("style");
0798: tagScript = lookup("script");
0799: tagNoscript = lookup("noscript");
0800: tagTable = lookup("table");
0801: tagCaption = lookup("caption");
0802: tagForm = lookup("form");
0803: tagTextarea = lookup("textarea");
0804: tagBlockquote = lookup("blockquote");
0805: tagApplet = lookup("applet");
0806: tagObject = lookup("object");
0807: tagDiv = lookup("div");
0808: tagSpan = lookup("span");
0809: tagInput = lookup("input");
0810: tagQ = lookup("q");
0811: tagBlink = lookup("blink");
0812: }
0813:
0814: /**
0815: * Setter for the current configuration instance.
0816: * @param configuration configuration instance
0817: */
0818: public void setConfiguration(Configuration configuration) {
0819: this .configuration = configuration;
0820: }
0821:
0822: /**
0823: * Lookup a tag definition by its name.
0824: * @param name tag name
0825: * @return tag definition (Dict)
0826: */
0827: public Dict lookup(String name) {
0828: return (Dict) tagHashtable.get(name);
0829: }
0830:
0831: /**
0832: * Installs a new tag in the tag table, or modify an existing one.
0833: * @param dict tag definition
0834: * @return installed Dict instance
0835: */
0836: public Dict install(Dict dict) {
0837: Dict d = (Dict) tagHashtable.get(dict.name);
0838: if (d != null) {
0839: d.versions = dict.versions;
0840: d.model |= dict.model;
0841: d.setParser(dict.getParser());
0842: d.setChkattrs(dict.getChkattrs());
0843: return d;
0844: }
0845:
0846: tagHashtable.put(dict.name, dict);
0847: return dict;
0848:
0849: }
0850:
0851: /**
0852: * Finds a tag by name.
0853: * @param node Node to find. If the element is found the tag property of node will be set.
0854: * @return true if the tag is found, false otherwise
0855: */
0856: public boolean findTag(Node node) {
0857: Dict np;
0858:
0859: if (configuration != null && configuration.xmlTags) {
0860: node.tag = XML_TAGS;
0861: return true;
0862: }
0863:
0864: if (node.element != null) {
0865: np = lookup(node.element);
0866: if (np != null) {
0867: node.tag = np;
0868: return true;
0869: }
0870: }
0871:
0872: return false;
0873: }
0874:
0875: /**
0876: * Finds a parser fo the given node.
0877: * @param node Node
0878: * @return parser for the node
0879: */
0880: public Parser findParser(Node node) {
0881: Dict np;
0882:
0883: if (node.element != null) {
0884: np = lookup(node.element);
0885: if (np != null) {
0886: return np.getParser();
0887: }
0888: }
0889:
0890: return null;
0891: }
0892:
0893: /**
0894: * May id or name serve as anchor?
0895: * @param node Node
0896: * @return <code>true</code> if tag can serve as an anchor
0897: */
0898: boolean isAnchorElement(Node node) {
0899: return node.tag == this .tagA || node.tag == this .tagApplet
0900: || node.tag == this .tagForm
0901: || node.tag == this .tagFrame
0902: || node.tag == this .tagIframe
0903: || node.tag == this .tagImg || node.tag == this .tagMap;
0904: }
0905:
0906: /**
0907: * Defines a new tag.
0908: * @param tagType tag type. Can be TAGTYPE_BLOCK | TAGTYPE_EMPTY | TAGTYPE_PRE | TAGTYPE_INLINE
0909: * @param name tag name
0910: */
0911: public void defineTag(short tagType, String name) {
0912: Parser tagParser;
0913: short model;
0914:
0915: switch (tagType) {
0916: case Dict.TAGTYPE_BLOCK:
0917: model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
0918: tagParser = ParserImpl.BLOCK;
0919: break;
0920:
0921: case Dict.TAGTYPE_EMPTY:
0922: model = (short) (Dict.CM_EMPTY | Dict.CM_NO_INDENT | Dict.CM_NEW);
0923: tagParser = ParserImpl.BLOCK;
0924: break;
0925:
0926: case Dict.TAGTYPE_PRE:
0927: model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
0928: tagParser = ParserImpl.PRE;
0929: break;
0930:
0931: case Dict.TAGTYPE_INLINE:
0932: default:
0933: // default to inline tag
0934: model = (short) (Dict.CM_INLINE | Dict.CM_NO_INDENT | Dict.CM_NEW);
0935: tagParser = ParserImpl.INLINE;
0936: break;
0937: }
0938:
0939: install(new Dict(name, Dict.VERS_PROPRIETARY, model, tagParser,
0940: null));
0941: }
0942:
0943: /**
0944: * return a List containing all the user-defined tag names.
0945: * @param tagType one of Dict.TAGTYPE_EMPTY | Dict.TAGTYPE_INLINE | Dict.TAGTYPE_BLOCK | Dict.TAGTYPE_PRE
0946: * @return List containing all the user-defined tag names
0947: */
0948: List findAllDefinedTag(short tagType) {
0949: List tagNames = new ArrayList();
0950:
0951: Iterator iterator = tagHashtable.values().iterator();
0952: while (iterator.hasNext()) {
0953: Dict curDictEntry = (Dict) iterator.next();
0954:
0955: if (curDictEntry != null) {
0956: switch (tagType) {
0957: // defined tags can be empty + inline
0958: case Dict.TAGTYPE_EMPTY:
0959: if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
0960: && ((curDictEntry.model & Dict.CM_EMPTY) == Dict.CM_EMPTY)
0961: && // (curDictEntry.parser == ParseBlock) &&
0962: (curDictEntry != tagWbr)) {
0963: tagNames.add(curDictEntry.name);
0964: }
0965: break;
0966:
0967: // defined tags can be empty + inline
0968: case Dict.TAGTYPE_INLINE:
0969: if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
0970: && ((curDictEntry.model & Dict.CM_INLINE) == Dict.CM_INLINE)
0971: && // (curDictEntry.parser == ParseInline) &&
0972: (curDictEntry != tagBlink)
0973: && (curDictEntry != tagNobr)
0974: && (curDictEntry != tagWbr)) {
0975: tagNames.add(curDictEntry.name);
0976: }
0977: break;
0978:
0979: // defined tags can be empty + block
0980: case Dict.TAGTYPE_BLOCK:
0981: if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
0982: && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
0983: && (curDictEntry.getParser() == ParserImpl.BLOCK)) {
0984: tagNames.add(curDictEntry.name);
0985: }
0986: break;
0987:
0988: case Dict.TAGTYPE_PRE:
0989: if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
0990: && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
0991: && (curDictEntry.getParser() == ParserImpl.PRE)) {
0992: tagNames.add(curDictEntry.name);
0993: }
0994: break;
0995: }
0996: }
0997: }
0998:
0999: return tagNames;
1000: }
1001:
1002: /**
1003: * Free node's attributes.
1004: * @param node Node
1005: */
1006: public void freeAttrs(Node node) {
1007: while (node.attributes != null) {
1008: AttVal av = node.attributes;
1009: if ("id".equalsIgnoreCase(av.attribute)
1010: || "name".equalsIgnoreCase(av.attribute)
1011: && isAnchorElement(node)) {
1012: removeAnchorByNode(node);
1013: }
1014:
1015: node.attributes = av.next;
1016: }
1017: }
1018:
1019: /**
1020: * Removes anchor for specific node.
1021: * @param node Node
1022: */
1023: void removeAnchorByNode(Node node) {
1024: Anchor delme = null;
1025: Anchor found = null;
1026: Anchor prev = null;
1027: Anchor next = null;
1028:
1029: for (found = anchorList; found != null; found = found.next) {
1030: next = found.next;
1031:
1032: if (found.node == node) {
1033: if (prev != null) {
1034: prev.next = next;
1035: } else {
1036: anchorList = next;
1037: }
1038:
1039: delme = found;
1040: } else {
1041: prev = found;
1042: }
1043: }
1044: if (delme != null) {
1045: delme = null; // freeAnchor
1046: }
1047: }
1048:
1049: /**
1050: * Initialize a new anchor.
1051: * @return a new anchor element
1052: */
1053: Anchor newAnchor() {
1054: Anchor a = new Anchor();
1055: return a;
1056: }
1057:
1058: /**
1059: * Adds a new anchor to namespace.
1060: * @param name anchor name
1061: * @param node destination for this anchor
1062: * @return Anchor
1063: */
1064: Anchor addAnchor(String name, Node node) {
1065: Anchor a = newAnchor();
1066:
1067: a.name = name;
1068: a.node = node;
1069:
1070: if (anchorList == null) {
1071: anchorList = a;
1072: } else {
1073: Anchor here = anchorList;
1074:
1075: while (here.next != null) {
1076: here = here.next;
1077: }
1078: here.next = a;
1079: }
1080:
1081: return anchorList;
1082: }
1083:
1084: /**
1085: * Return node associated with anchor.
1086: * @param name anchor name
1087: * @return node associated with anchor
1088: */
1089: Node getNodeByAnchor(String name) {
1090: Anchor found;
1091:
1092: for (found = anchorList; found != null; found = found.next) {
1093: if (name.equalsIgnoreCase(found.name)) {
1094: break;
1095: }
1096: }
1097:
1098: if (found != null) {
1099: return found.node;
1100: }
1101:
1102: return null;
1103: }
1104:
1105: /**
1106: * free all anchors.
1107: */
1108: void freeAnchors() {
1109: anchorList = null;
1110: }
1111:
1112: }
|