Source Code Cross Referenced for Lexer.java in » Groupware » hipergate » org » w3c » tidy » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Groupware » hipergate » org.w3c.tidy
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * @(#)Lexer.java   1.11 2000/08/16
0003:         *
0004:         */
0005:
0006:        package org.w3c.tidy;
0007:
0008:        /**
0009:         *
0010:         * Lexer for html parser
0011:         *
0012:         * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
0013:         * See Tidy.java for the copyright notice.
0014:         * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
0015:         * HTML Tidy Release 4 Aug 2000</a>
0016:         *
0017:         * @author  Dave Raggett <dsr@w3.org>
0018:         * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0019:         * @version 1.0, 1999/05/22
0020:         * @version 1.0.1, 1999/05/29
0021:         * @version 1.1, 1999/06/18 Java Bean
0022:         * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
0023:         * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
0024:         * @version 1.4, 1999/09/04 DOM support
0025:         * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
0026:         * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
0027:         * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
0028:         * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
0029:         * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
0030:         * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
0031:         * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
0032:         */
0033:
0034:        /*
0035:         Given a file stream fp it returns a sequence of tokens.
0036:
0037:         GetToken(fp) gets the next token
0038:         UngetToken(fp) provides one level undo
0039:
0040:         The tags include an attribute list:
0041:
0042:         - linked list of attribute/value nodes
0043:         - each node has 2 null-terminated strings.
0044:         - entities are replaced in attribute values
0045:
0046:         white space is compacted if not in preformatted mode
0047:         If not in preformatted mode then leading white space
0048:         is discarded and subsequent white space sequences
0049:         compacted to single space chars.
0050:
0051:         If XmlTags is no then Tag names are folded to upper
0052:         case and attribute names to lower case.
0053:
0054:         Not yet done:
0055:         -   Doctype subset and marked sections
0056:         */
0057:
0058:        import java.io.PrintWriter;
0059:        import java.util.Stack;
0060:        import java.util.Vector;
0061:
0062:        public class Lexer {
0063:
0064:            public StreamIn in; /* file stream */
0065:            public PrintWriter errout; /* error output stream */
0066:            public short badAccess; /* for accessibility errors */
0067:            public short badLayout; /* for bad style errors */
0068:            public short badChars; /* for bad char encodings */
0069:            public short badForm; /* for mismatched/mispositioned form tags */
0070:            public short warnings; /* count of warnings in this document */
0071:            public short errors; /* count of errors */
0072:            public int lines; /* lines seen */
0073:            public int columns; /* at start of current token */
0074:            public boolean waswhite; /* used to collapse contiguous white space */
0075:            public boolean pushed; /* true after token has been pushed back */
0076:            public boolean insertspace; /* when space is moved after end tag */
0077:            public boolean excludeBlocks; /* Netscape compatibility */
0078:            public boolean exiled; /* true if moved out of table */
0079:            public boolean isvoyager; /* true if xmlns attribute on html element */
0080:            public short versions; /* bit vector of HTML versions */
0081:            public int doctype; /* version as given by doctype (if any) */
0082:            public boolean badDoctype; /* e.g. if html or PUBLIC is missing */
0083:            public int txtstart; /* start of current node */
0084:            public int txtend; /* end of current node */
0085:            public short state; /* state of lexer's finite state machine */
0086:            public Node token;
0087:
0088:            /* 
0089:              lexer character buffer
0090:
0091:              parse tree nodes span onto this buffer
0092:              which contains the concatenated text
0093:              contents of all of the elements.
0094:
0095:             lexsize must be reset for each file.
0096:             */
0097:            public byte[] lexbuf; /* byte buffer of UTF-8 chars */
0098:            public int lexlength; /* allocated */
0099:            public int lexsize; /* used */
0100:
0101:            /* Inline stack for compatibility with Mosaic */
0102:            public Node inode; /* for deferring text node */
0103:            public int insert; /* for inferring inline tags */
0104:            public Stack istack;
0105:            public int istackbase; /* start of frame */
0106:
0107:            public Style styles; /* used for cleaning up presentation markup */
0108:
0109:            public Configuration configuration;
0110:            protected int seenBodyEndTag; /* used by parser */
0111:            private Vector nodeList;
0112:
0113:            public Lexer(StreamIn in, Configuration configuration) {
0114:                this .in = in;
0115:                this .lines = 1;
0116:                this .columns = 1;
0117:                this .state = LEX_CONTENT;
0118:                this .badAccess = 0;
0119:                this .badLayout = 0;
0120:                this .badChars = 0;
0121:                this .badForm = 0;
0122:                this .warnings = 0;
0123:                this .errors = 0;
0124:                this .waswhite = false;
0125:                this .pushed = false;
0126:                this .insertspace = false;
0127:                this .exiled = false;
0128:                this .isvoyager = false;
0129:                this .versions = Dict.VERS_EVERYTHING;
0130:                this .doctype = Dict.VERS_UNKNOWN;
0131:                this .badDoctype = false;
0132:                this .txtstart = 0;
0133:                this .txtend = 0;
0134:                this .token = null;
0135:                this .lexbuf = null;
0136:                this .lexlength = 0;
0137:                this .lexsize = 0;
0138:                this .inode = null;
0139:                this .insert = -1;
0140:                this .istack = new Stack();
0141:                this .istackbase = 0;
0142:                this .styles = null;
0143:                this .configuration = configuration;
0144:                this .seenBodyEndTag = 0;
0145:                this .nodeList = new Vector();
0146:            }
0147:
0148:            public Node newNode() {
0149:                Node node = new Node();
0150:                nodeList.addElement(node);
0151:                return node;
0152:            }
0153:
0154:            public Node newNode(short type, byte[] textarray, int start, int end) {
0155:                Node node = new Node(type, textarray, start, end);
0156:                nodeList.addElement(node);
0157:                return node;
0158:            }
0159:
0160:            public Node newNode(short type, byte[] textarray, int start,
0161:                    int end, String element) {
0162:                Node node = new Node(type, textarray, start, end, element,
0163:                        configuration.tt);
0164:                nodeList.addElement(node);
0165:                return node;
0166:            }
0167:
0168:            public Node cloneNode(Node node) {
0169:                Node cnode = (Node) node.clone();
0170:                nodeList.addElement(cnode);
0171:                for (AttVal att = cnode.attributes; att != null; att = att.next) {
0172:                    if (att.asp != null)
0173:                        nodeList.addElement(att.asp);
0174:                    if (att.php != null)
0175:                        nodeList.addElement(att.php);
0176:                }
0177:                return cnode;
0178:            }
0179:
0180:            public AttVal cloneAttributes(AttVal attrs) {
0181:                AttVal cattrs = (AttVal) attrs.clone();
0182:                for (AttVal att = cattrs; att != null; att = att.next) {
0183:                    if (att.asp != null)
0184:                        nodeList.addElement(att.asp);
0185:                    if (att.php != null)
0186:                        nodeList.addElement(att.php);
0187:                }
0188:                return cattrs;
0189:            }
0190:
0191:            protected void updateNodeTextArrays(byte[] oldtextarray,
0192:                    byte[] newtextarray) {
0193:                Node node;
0194:                for (int i = 0; i < nodeList.size(); i++) {
0195:                    node = (Node) (nodeList.elementAt(i));
0196:                    if (node.textarray == oldtextarray)
0197:                        node.textarray = newtextarray;
0198:                }
0199:            }
0200:
0201:            /* used for creating preformatted text from Word2000 */
0202:            public Node newLineNode() {
0203:                Node node = newNode();
0204:
0205:                node.textarray = this .lexbuf;
0206:                node.start = this .lexsize;
0207:                addCharToLexer((int) '\n');
0208:                node.end = this .lexsize;
0209:                return node;
0210:            }
0211:
0212:            // Should always be able convert to/from UTF-8, so encoding exceptions are
0213:            // converted to an Error to avoid adding throws declarations in
0214:            // lots of methods.
0215:
0216:            public static byte[] getBytes(String str) {
0217:                try {
0218:                    return str.getBytes("UTF8");
0219:                } catch (java.io.UnsupportedEncodingException e) {
0220:                    throw new Error("string to UTF-8 conversion failed: "
0221:                            + e.getMessage());
0222:                }
0223:            }
0224:
0225:            public static String getString(byte[] bytes, int offset, int length) {
0226:                try {
0227:                    return new String(bytes, offset, length, "UTF8");
0228:                } catch (java.io.UnsupportedEncodingException e) {
0229:                    throw new Error("UTF-8 to string conversion failed: "
0230:                            + e.getMessage());
0231:                }
0232:            }
0233:
0234:            public boolean endOfInput() {
0235:                return this .in.isEndOfStream();
0236:            }
0237:
0238:            public void addByte(int c) {
0239:                if (this .lexsize + 1 >= this .lexlength) {
0240:                    while (this .lexsize + 1 >= this .lexlength) {
0241:                        if (this .lexlength == 0)
0242:                            this .lexlength = 8192;
0243:                        else
0244:                            this .lexlength = this .lexlength * 2;
0245:                    }
0246:
0247:                    byte[] temp = this .lexbuf;
0248:                    this .lexbuf = new byte[this .lexlength];
0249:                    if (temp != null) {
0250:                        System.arraycopy(temp, 0, this .lexbuf, 0, temp.length);
0251:                        updateNodeTextArrays(temp, this .lexbuf);
0252:                    }
0253:                }
0254:
0255:                this .lexbuf[this .lexsize++] = (byte) c;
0256:                this .lexbuf[this .lexsize] = (byte) '\0'; /* debug */
0257:            }
0258:
0259:            public void changeChar(byte c) {
0260:                if (this .lexsize > 0) {
0261:                    this .lexbuf[this .lexsize - 1] = c;
0262:                }
0263:            }
0264:
0265:            /* store char c as UTF-8 encoded byte stream */
0266:            public void addCharToLexer(int c) {
0267:                if (c < 128)
0268:                    addByte(c);
0269:                else if (c <= 0x7FF) {
0270:                    addByte(0xC0 | (c >> 6));
0271:                    addByte(0x80 | (c & 0x3F));
0272:                } else if (c <= 0xFFFF) {
0273:                    addByte(0xE0 | (c >> 12));
0274:                    addByte(0x80 | ((c >> 6) & 0x3F));
0275:                    addByte(0x80 | (c & 0x3F));
0276:                } else if (c <= 0x1FFFFF) {
0277:                    addByte(0xF0 | (c >> 18));
0278:                    addByte(0x80 | ((c >> 12) & 0x3F));
0279:                    addByte(0x80 | ((c >> 6) & 0x3F));
0280:                    addByte(0x80 | (c & 0x3F));
0281:                } else {
0282:                    addByte(0xF8 | (c >> 24));
0283:                    addByte(0x80 | ((c >> 18) & 0x3F));
0284:                    addByte(0x80 | ((c >> 12) & 0x3F));
0285:                    addByte(0x80 | ((c >> 6) & 0x3F));
0286:                    addByte(0x80 | (c & 0x3F));
0287:                }
0288:            }
0289:
0290:            public void addStringToLexer(String str) {
0291:                for (int i = 0; i < str.length(); i++) {
0292:                    addCharToLexer((int) str.charAt(i));
0293:                }
0294:            }
0295:
0296:            /*
0297:              No longer attempts to insert missing ';' for unknown
0298:              enitities unless one was present already, since this
0299:              gives unexpected results.
0300:
0301:              For example:   <a href="something.htm?foo&bar&fred">
0302:              was tidied to: <a href="something.htm?foo&amp;bar;&amp;fred;">
0303:              rather than:   <a href="something.htm?foo&amp;bar&amp;fred">
0304:
0305:              My thanks for Maurice Buxton for spotting this.
0306:             */
0307:            public void parseEntity(short mode) {
0308:                short map;
0309:                int start;
0310:                boolean first = true;
0311:                boolean semicolon = false;
0312:                boolean numeric = false;
0313:                int c, ch, startcol;
0314:                String str;
0315:
0316:                start = this .lexsize - 1; /* to start at "&" */
0317:                startcol = this .in.curcol - 1;
0318:
0319:                while (true) {
0320:                    c = this .in.readChar();
0321:                    if (c == StreamIn.EndOfStream)
0322:                        break;
0323:                    if (c == ';') {
0324:                        semicolon = true;
0325:                        break;
0326:                    }
0327:
0328:                    if (first && c == '#') {
0329:                        addCharToLexer(c);
0330:                        first = false;
0331:                        numeric = true;
0332:                        continue;
0333:                    }
0334:
0335:                    first = false;
0336:                    map = MAP((char) c);
0337:
0338:                    /* AQ: Added flag for numeric entities so that numeric entities
0339:                       with missing semi-colons are recognized.
0340:                       Eg. "&#114e&#112;..." is recognized as "rep"
0341:                     */
0342:                    if (numeric && ((c == 'x') || ((map & DIGIT) != 0))) {
0343:                        addCharToLexer(c);
0344:                        continue;
0345:                    }
0346:                    if (!numeric && ((map & NAMECHAR) != 0)) {
0347:                        addCharToLexer(c);
0348:                        continue;
0349:                    }
0350:
0351:                    /* otherwise put it back */
0352:
0353:                    this .in.ungetChar(c);
0354:                    break;
0355:                }
0356:
0357:                str = getString(this .lexbuf, start, this .lexsize - start);
0358:                ch = EntityTable.getDefaultEntityTable().entityCode(str);
0359:
0360:                /* deal with unrecognized entities */
0361:                if (ch <= 0) {
0362:                    /* set error position just before offending chararcter */
0363:                    this .lines = this .in.curline;
0364:                    this .columns = startcol;
0365:
0366:                    if (this .lexsize > start + 1) {
0367:                        Report
0368:                                .entityError(this , Report.UNKNOWN_ENTITY, str,
0369:                                        ch);
0370:
0371:                        if (semicolon)
0372:                            addCharToLexer(';');
0373:                    } else /* naked & */
0374:                    {
0375:                        Report.entityError(this , Report.UNESCAPED_AMPERSAND,
0376:                                str, ch);
0377:                    }
0378:                } else {
0379:                    if (c != ';') /* issue warning if not terminated by ';' */
0380:                    {
0381:                        /* set error position just before offending chararcter */
0382:                        this .lines = this .in.curline;
0383:                        this .columns = startcol;
0384:                        Report.entityError(this , Report.MISSING_SEMICOLON, str,
0385:                                c);
0386:                    }
0387:
0388:                    this .lexsize = start;
0389:
0390:                    if (ch == 160 && (mode & Preformatted) != 0)
0391:                        ch = ' ';
0392:
0393:                    addCharToLexer(ch);
0394:
0395:                    if (ch == '&' && !this .configuration.QuoteAmpersand) {
0396:                        addCharToLexer('a');
0397:                        addCharToLexer('m');
0398:                        addCharToLexer('p');
0399:                        addCharToLexer(';');
0400:                    }
0401:                }
0402:            }
0403:
0404:            public char parseTagName() {
0405:                short map;
0406:                int c;
0407:
0408:                /* fold case of first char in buffer */
0409:
0410:                c = this .lexbuf[this .txtstart];
0411:                map = MAP((char) c);
0412:
0413:                if (!this .configuration.XmlTags && (map & UPPERCASE) != 0) {
0414:                    c += (int) ((int) 'a' - (int) 'A');
0415:                    this .lexbuf[this .txtstart] = (byte) c;
0416:                }
0417:
0418:                while (true) {
0419:                    c = this .in.readChar();
0420:                    if (c == StreamIn.EndOfStream)
0421:                        break;
0422:                    map = MAP((char) c);
0423:
0424:                    if ((map & NAMECHAR) == 0)
0425:                        break;
0426:
0427:                    /* fold case of subsequent chars */
0428:
0429:                    if (!this .configuration.XmlTags && (map & UPPERCASE) != 0)
0430:                        c += (int) ((int) 'a' - (int) 'A');
0431:
0432:                    addCharToLexer(c);
0433:                }
0434:
0435:                this .txtend = this .lexsize;
0436:                return (char) c;
0437:            }
0438:
0439:            public void addStringLiteral(String str) {
0440:                for (int i = 0; i < str.length(); i++) {
0441:                    addCharToLexer((int) str.charAt(i));
0442:                }
0443:            }
0444:
0445:            /* choose what version to use for new doctype */
0446:            public short HTMLVersion() {
0447:                short versions;
0448:
0449:                versions = this .versions;
0450:
0451:                if ((versions & Dict.VERS_HTML20) != 0)
0452:                    return Dict.VERS_HTML20;
0453:
0454:                if ((versions & Dict.VERS_HTML32) != 0)
0455:                    return Dict.VERS_HTML32;
0456:
0457:                if ((versions & Dict.VERS_HTML40_STRICT) != 0)
0458:                    return Dict.VERS_HTML40_STRICT;
0459:
0460:                if ((versions & Dict.VERS_HTML40_LOOSE) != 0)
0461:                    return Dict.VERS_HTML40_LOOSE;
0462:
0463:                if ((versions & Dict.VERS_FRAMES) != 0)
0464:                    return Dict.VERS_FRAMES;
0465:
0466:                return Dict.VERS_UNKNOWN;
0467:            }
0468:
0469:            public String HTMLVersionName() {
0470:                short guessed;
0471:                int j;
0472:
0473:                guessed = apparentVersion();
0474:
0475:                for (j = 0; j < W3CVersion.length; ++j) {
0476:                    if (guessed == W3CVersion[j].code) {
0477:                        if (this .isvoyager)
0478:                            return W3CVersion[j].voyagerName;
0479:
0480:                        return W3CVersion[j].name;
0481:                    }
0482:                }
0483:
0484:                return null;
0485:            }
0486:
0487:            /* add meta element for Tidy */
0488:            public boolean addGenerator(Node root) {
0489:                AttVal attval;
0490:                Node node;
0491:                Node head = root.findHEAD(configuration.tt);
0492:
0493:                if (head != null) {
0494:                    for (node = head.content; node != null; node = node.next) {
0495:                        if (node.tag == configuration.tt.tagMeta) {
0496:                            attval = node.getAttrByName("name");
0497:
0498:                            if (attval != null
0499:                                    && attval.value != null
0500:                                    && Lexer.wstrcasecmp(attval.value,
0501:                                            "generator") == 0) {
0502:                                attval = node.getAttrByName("content");
0503:
0504:                                if (attval != null
0505:                                        && attval.value != null
0506:                                        && attval.value.length() >= 9
0507:                                        && Lexer.wstrcasecmp(attval.value
0508:                                                .substring(0, 9), "HTML Tidy") == 0) {
0509:                                    return false;
0510:                                }
0511:                            }
0512:                        }
0513:                    }
0514:
0515:                    node = this .inferredTag("meta");
0516:                    node.addAttribute("content", "HTML Tidy, see www.w3.org");
0517:                    node.addAttribute("name", "generator");
0518:                    Node.insertNodeAtStart(head, node);
0519:                    return true;
0520:                }
0521:
0522:                return false;
0523:            }
0524:
0525:            /* return true if substring s is in p and isn't all in upper case */
0526:            /* this is used to check the case of SYSTEM, PUBLIC, DTD and EN */
0527:            /* len is how many chars to check in p */
0528:            private static boolean findBadSubString(String s, String p, int len) {
0529:                int n = s.length();
0530:                int i = 0;
0531:                String ps;
0532:
0533:                while (n < len) {
0534:                    ps = p.substring(i, i + n);
0535:                    if (wstrcasecmp(s, ps) == 0)
0536:                        return (!ps.equals(s.substring(0, n)));
0537:
0538:                    ++i;
0539:                    --len;
0540:                }
0541:
0542:                return false;
0543:            }
0544:
0545:            public boolean checkDocTypeKeyWords(Node doctype) {
0546:                int len = doctype.end - doctype.start;
0547:                String s = getString(this .lexbuf, doctype.start, len);
0548:
0549:                return !(findBadSubString("SYSTEM", s, len)
0550:                        || findBadSubString("PUBLIC", s, len)
0551:                        || findBadSubString("//DTD", s, len)
0552:                        || findBadSubString("//W3C", s, len) || findBadSubString(
0553:                        "//EN", s, len));
0554:            }
0555:
0556:            /* examine <!DOCTYPE> to identify version */
0557:            public short findGivenVersion(Node doctype) {
0558:                String p, s;
0559:                int i, j;
0560:                int len;
0561:                String str1;
0562:                String str2;
0563:
0564:                /* if root tag for doctype isn't html give up now */
0565:                str1 = getString(this .lexbuf, doctype.start, 5);
0566:                if (wstrcasecmp(str1, "html ") != 0)
0567:                    return 0;
0568:
0569:                if (!checkDocTypeKeyWords(doctype))
0570:                    Report.warning(this , doctype, null,
0571:                            Report.DTYPE_NOT_UPPER_CASE);
0572:
0573:                /* give up if all we are given is the system id for the doctype */
0574:                str1 = getString(this .lexbuf, doctype.start + 5, 7);
0575:                if (wstrcasecmp(str1, "SYSTEM ") == 0) {
0576:                    /* but at least ensure the case is correct */
0577:                    if (!str1.substring(0, 6).equals("SYSTEM"))
0578:                        System.arraycopy(getBytes("SYSTEM"), 0, this .lexbuf,
0579:                                doctype.start + 5, 6);
0580:                    return 0; /* unrecognized */
0581:                }
0582:
0583:                if (wstrcasecmp(str1, "PUBLIC ") == 0) {
0584:                    if (!str1.substring(0, 6).equals("PUBLIC"))
0585:                        System.arraycopy(getBytes("PUBLIC "), 0, this .lexbuf,
0586:                                doctype.start + 5, 6);
0587:                } else
0588:                    this .badDoctype = true;
0589:
0590:                for (i = doctype.start; i < doctype.end; ++i) {
0591:                    if (this .lexbuf[i] == (byte) '"') {
0592:                        str1 = getString(this .lexbuf, i + 1, 12);
0593:                        str2 = getString(this .lexbuf, i + 1, 13);
0594:                        if (str1.equals("-//W3C//DTD ")) {
0595:                            /* compute length of identifier e.g. "HTML 4.0 Transitional" */
0596:                            for (j = i + 13; j < doctype.end
0597:                                    && this .lexbuf[j] != (byte) '/'; ++j)
0598:                                ;
0599:                            len = j - i - 13;
0600:                            p = getString(this .lexbuf, i + 13, len);
0601:
0602:                            for (j = 1; j < W3CVersion.length; ++j) {
0603:                                s = W3CVersion[j].name;
0604:                                if (len == s.length() && s.equals(p))
0605:                                    return W3CVersion[j].code;
0606:                            }
0607:
0608:                            /* else unrecognized version */
0609:                        } else if (str2.equals("-//IETF//DTD ")) {
0610:                            /* compute length of identifier e.g. "HTML 2.0" */
0611:                            for (j = i + 14; j < doctype.end
0612:                                    && this .lexbuf[j] != (byte) '/'; ++j)
0613:                                ;
0614:                            len = j - i - 14;
0615:
0616:                            p = getString(this .lexbuf, i + 14, len);
0617:                            s = W3CVersion[0].name;
0618:                            if (len == s.length() && s.equals(p))
0619:                                return W3CVersion[0].code;
0620:
0621:                            /* else unrecognized version */
0622:                        }
0623:                        break;
0624:                    }
0625:                }
0626:
0627:                return 0;
0628:            }
0629:
0630:            public void fixHTMLNameSpace(Node root, String profile) {
0631:                Node node;
0632:                AttVal prev, attr;
0633:
0634:                for (node = root.content; node != null
0635:                        && node.tag != configuration.tt.tagHtml; node = node.next)
0636:                    ;
0637:
0638:                if (node != null) {
0639:                    prev = null;
0640:
0641:                    for (attr = node.attributes; attr != null; attr = attr.next) {
0642:                        if (attr.attribute.equals("xmlns"))
0643:                            break;
0644:
0645:                        prev = attr;
0646:                    }
0647:
0648:                    if (attr != null) {
0649:                        if (!attr.value.equals(profile)) {
0650:                            Report.warning(this , node, null,
0651:                                    Report.INCONSISTENT_NAMESPACE);
0652:                            attr.value = profile;
0653:                        }
0654:                    } else {
0655:                        attr = new AttVal(node.attributes, null, (int) '"',
0656:                                "xmlns", profile);
0657:                        attr.dict = AttributeTable.getDefaultAttributeTable()
0658:                                .findAttribute(attr);
0659:                        node.attributes = attr;
0660:                    }
0661:                }
0662:            }
0663:
0664:            public boolean setXHTMLDocType(Node root) {
0665:                String fpi = " ";
0666:                String sysid = "";
0667:                String namespace = XHTML_NAMESPACE;
0668:                Node doctype;
0669:
0670:                doctype = root.findDocType();
0671:
0672:                if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) {
0673:                    if (doctype != null)
0674:                        Node.discardElement(doctype);
0675:                    return true;
0676:                }
0677:
0678:                if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) {
0679:                    /* see what flavor of XHTML this document matches */
0680:                    if ((this .versions & Dict.VERS_HTML40_STRICT) != 0) { /* use XHTML strict */
0681:                        fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
0682:                        sysid = voyager_strict;
0683:                    } else if ((this .versions & Dict.VERS_LOOSE) != 0) {
0684:                        fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
0685:                        sysid = voyager_loose;
0686:                    } else if ((this .versions & Dict.VERS_FRAMES) != 0) { /* use XHTML frames */
0687:                        fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN";
0688:                        sysid = voyager_frameset;
0689:                    } else /* lets assume XHTML transitional */
0690:                    {
0691:                        fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
0692:                        sysid = voyager_loose;
0693:                    }
0694:                } else if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) {
0695:                    fpi = "-//W3C//DTD XHTML 1.0 Strict//EN";
0696:                    sysid = voyager_strict;
0697:                } else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) {
0698:                    fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN";
0699:                    sysid = voyager_loose;
0700:                }
0701:
0702:                fixHTMLNameSpace(root, namespace);
0703:
0704:                if (doctype == null) {
0705:                    doctype = newNode(Node.DocTypeTag, this .lexbuf, 0, 0);
0706:                    doctype.next = root.content;
0707:                    doctype.parent = root;
0708:                    doctype.prev = null;
0709:                    root.content = doctype;
0710:                }
0711:
0712:                if (configuration.docTypeMode == Configuration.DOCTYPE_USER
0713:                        && configuration.docTypeStr != null) {
0714:                    fpi = configuration.docTypeStr;
0715:                    sysid = "";
0716:                }
0717:
0718:                this .txtstart = this .lexsize;
0719:                this .txtend = this .lexsize;
0720:
0721:                /* add public identifier */
0722:                addStringLiteral("html PUBLIC ");
0723:
0724:                /* check if the fpi is quoted or not */
0725:                if (fpi.charAt(0) == '"')
0726:                    addStringLiteral(fpi);
0727:                else {
0728:                    addStringLiteral("\"");
0729:                    addStringLiteral(fpi);
0730:                    addStringLiteral("\"");
0731:                }
0732:
0733:                if (sysid.length() + 6 >= this .configuration.wraplen)
0734:                    addStringLiteral("\n\"");
0735:                else
0736:                    addStringLiteral("\n    \"");
0737:
0738:                /* add system identifier */
0739:                addStringLiteral(sysid);
0740:                addStringLiteral("\"");
0741:
0742:                this .txtend = this .lexsize;
0743:
0744:                doctype.start = this .txtstart;
0745:                doctype.end = this .txtend;
0746:
0747:                return false;
0748:            }
0749:
0750:            public short apparentVersion() {
0751:                switch (this .doctype) {
0752:                case Dict.VERS_UNKNOWN:
0753:                    return HTMLVersion();
0754:
0755:                case Dict.VERS_HTML20:
0756:                    if ((this .versions & Dict.VERS_HTML20) != 0)
0757:                        return Dict.VERS_HTML20;
0758:
0759:                    break;
0760:
0761:                case Dict.VERS_HTML32:
0762:                    if ((this .versions & Dict.VERS_HTML32) != 0)
0763:                        return Dict.VERS_HTML32;
0764:
0765:                    break; /* to replace old version by new */
0766:
0767:                case Dict.VERS_HTML40_STRICT:
0768:                    if ((this .versions & Dict.VERS_HTML40_STRICT) != 0)
0769:                        return Dict.VERS_HTML40_STRICT;
0770:
0771:                    break;
0772:
0773:                case Dict.VERS_HTML40_LOOSE:
0774:                    if ((this .versions & Dict.VERS_HTML40_LOOSE) != 0)
0775:                        return Dict.VERS_HTML40_LOOSE;
0776:
0777:                    break; /* to replace old version by new */
0778:
0779:                case Dict.VERS_FRAMES:
0780:                    if ((this .versions & Dict.VERS_FRAMES) != 0)
0781:                        return Dict.VERS_FRAMES;
0782:
0783:                    break;
0784:                }
0785:
0786:                Report.warning(this , null, null, Report.INCONSISTENT_VERSION);
0787:                return this .HTMLVersion();
0788:            }
0789:
0790:            /* fixup doctype if missing */
0791:            public boolean fixDocType(Node root) {
0792:                Node doctype;
0793:                int guessed = Dict.VERS_HTML40_STRICT, i;
0794:
0795:                if (this .badDoctype)
0796:                    Report.warning(this , null, null, Report.MALFORMED_DOCTYPE);
0797:
0798:                if (configuration.XmlOut)
0799:                    return true;
0800:
0801:                doctype = root.findDocType();
0802:
0803:                if (configuration.docTypeMode == Configuration.DOCTYPE_OMIT) {
0804:                    if (doctype != null)
0805:                        Node.discardElement(doctype);
0806:                    return true;
0807:                }
0808:
0809:                if (configuration.docTypeMode == Configuration.DOCTYPE_STRICT) {
0810:                    Node.discardElement(doctype);
0811:                    doctype = null;
0812:                    guessed = Dict.VERS_HTML40_STRICT;
0813:                } else if (configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) {
0814:                    Node.discardElement(doctype);
0815:                    doctype = null;
0816:                    guessed = Dict.VERS_HTML40_LOOSE;
0817:                } else if (configuration.docTypeMode == Configuration.DOCTYPE_AUTO) {
0818:                    if (doctype != null) {
0819:                        if (this .doctype == Dict.VERS_UNKNOWN)
0820:                            return false;
0821:
0822:                        switch (this .doctype) {
0823:                        case Dict.VERS_UNKNOWN:
0824:                            return false;
0825:
0826:                        case Dict.VERS_HTML20:
0827:                            if ((this .versions & Dict.VERS_HTML20) != 0)
0828:                                return true;
0829:
0830:                            break; /* to replace old version by new */
0831:
0832:                        case Dict.VERS_HTML32:
0833:                            if ((this .versions & Dict.VERS_HTML32) != 0)
0834:                                return true;
0835:
0836:                            break; /* to replace old version by new */
0837:
0838:                        case Dict.VERS_HTML40_STRICT:
0839:                            if ((this .versions & Dict.VERS_HTML40_STRICT) != 0)
0840:                                return true;
0841:
0842:                            break; /* to replace old version by new */
0843:
0844:                        case Dict.VERS_HTML40_LOOSE:
0845:                            if ((this .versions & Dict.VERS_HTML40_LOOSE) != 0)
0846:                                return true;
0847:
0848:                            break; /* to replace old version by new */
0849:
0850:                        case Dict.VERS_FRAMES:
0851:                            if ((this .versions & Dict.VERS_FRAMES) != 0)
0852:                                return true;
0853:
0854:                            break; /* to replace old version by new */
0855:                        }
0856:
0857:                        /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */
0858:                    }
0859:
0860:                    /* choose new doctype */
0861:                    guessed = HTMLVersion();
0862:                }
0863:
0864:                if (guessed == Dict.VERS_UNKNOWN)
0865:                    return false;
0866:
0867:                /* for XML use the Voyager system identifier */
0868:                if (this .configuration.XmlOut || this .configuration.XmlTags
0869:                        || this .isvoyager) {
0870:                    if (doctype != null)
0871:                        Node.discardElement(doctype);
0872:
0873:                    for (i = 0; i < W3CVersion.length; ++i) {
0874:                        if (guessed == W3CVersion[i].code) {
0875:                            fixHTMLNameSpace(root, W3CVersion[i].profile);
0876:                            break;
0877:                        }
0878:                    }
0879:
0880:                    return true;
0881:                }
0882:
0883:                if (doctype == null) {
0884:                    doctype = newNode(Node.DocTypeTag, this .lexbuf, 0, 0);
0885:                    doctype.next = root.content;
0886:                    doctype.parent = root;
0887:                    doctype.prev = null;
0888:                    root.content = doctype;
0889:                }
0890:
0891:                this .txtstart = this .lexsize;
0892:                this .txtend = this .lexsize;
0893:
0894:                /* use the appropriate public identifier */
0895:                addStringLiteral("html PUBLIC ");
0896:
0897:                if (configuration.docTypeMode == Configuration.DOCTYPE_USER
0898:                        && configuration.docTypeStr != null)
0899:                    addStringLiteral(configuration.docTypeStr);
0900:                else if (guessed == Dict.VERS_HTML20)
0901:                    addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\"");
0902:                else {
0903:                    addStringLiteral("\"-//W3C//DTD ");
0904:
0905:                    for (i = 0; i < W3CVersion.length; ++i) {
0906:                        if (guessed == W3CVersion[i].code) {
0907:                            addStringLiteral(W3CVersion[i].name);
0908:                            break;
0909:                        }
0910:                    }
0911:
0912:                    addStringLiteral("//EN\"");
0913:                }
0914:
0915:                this .txtend = this .lexsize;
0916:
0917:                doctype.start = this .txtstart;
0918:                doctype.end = this .txtend;
0919:
0920:                return true;
0921:            }
0922:
0923:            /* ensure XML document starts with <?XML version="1.0"?> */
0924:            public boolean fixXMLPI(Node root) {
0925:                Node xml;
0926:                int s;
0927:
0928:                if (root.content != null
0929:                        && root.content.type == Node.ProcInsTag) {
0930:                    s = root.content.start;
0931:
0932:                    if (this .lexbuf[s] == (byte) 'x'
0933:                            && this .lexbuf[s + 1] == (byte) 'm'
0934:                            && this .lexbuf[s + 2] == (byte) 'l')
0935:                        return true;
0936:                }
0937:
0938:                xml = newNode(Node.ProcInsTag, this .lexbuf, 0, 0);
0939:                xml.next = root.content;
0940:
0941:                if (root.content != null) {
0942:                    root.content.prev = xml;
0943:                    xml.next = root.content;
0944:                }
0945:
0946:                root.content = xml;
0947:
0948:                this .txtstart = this .lexsize;
0949:                this .txtend = this .lexsize;
0950:                addStringLiteral("xml version=\"1.0\"");
0951:                if (this .configuration.CharEncoding == Configuration.LATIN1)
0952:                    addStringLiteral(" encoding=\"ISO-8859-1\"");
0953:                this .txtend = this .lexsize;
0954:
0955:                xml.start = this .txtstart;
0956:                xml.end = this .txtend;
0957:                return false;
0958:            }
0959:
0960:            public Node inferredTag(String name) {
0961:                Node node;
0962:
0963:                node = newNode(Node.StartTag, this .lexbuf, this .txtstart,
0964:                        this .txtend, name);
0965:                node.implicit = true;
0966:                return node;
0967:            }
0968:
0969:            public static boolean expectsContent(Node node) {
0970:                if (node.type != Node.StartTag)
0971:                    return false;
0972:
0973:                /* unknown element? */
0974:                if (node.tag == null)
0975:                    return true;
0976:
0977:                if ((node.tag.model & Dict.CM_EMPTY) != 0)
0978:                    return false;
0979:
0980:                return true;
0981:            }
0982:
0983:            /*
0984:              create a text node for the contents of
0985:              a CDATA element like style or script
0986:              which ends with </foo> for some foo.
0987:             */
0988:            public Node getCDATA(Node container) {
0989:                int c, lastc, start, len, i;
0990:                String str;
0991:                boolean endtag = false;
0992:
0993:                this .lines = this .in.curline;
0994:                this .columns = this .in.curcol;
0995:                this .waswhite = false;
0996:                this .txtstart = this .lexsize;
0997:                this .txtend = this .lexsize;
0998:
0999:                lastc = (int) '\0';
1000:                start = -1;
1001:
1002:                while (true) {
1003:                    c = this .in.readChar();
1004:                    if (c == StreamIn.EndOfStream)
1005:                        break;
1006:                    /* treat \r\n as \n and \r as \n */
1007:
1008:                    if (c == (int) '/' && lastc == (int) '<') {
1009:                        if (endtag) {
1010:                            this .lines = this .in.curline;
1011:                            this .columns = this .in.curcol - 3;
1012:
1013:                            Report.warning(this , null, null,
1014:                                    Report.BAD_CDATA_CONTENT);
1015:                        }
1016:
1017:                        start = this .lexsize + 1; /* to first letter */
1018:                        endtag = true;
1019:                    } else if (c == (int) '>' && start >= 0) {
1020:                        len = this .lexsize - start;
1021:                        if (len == container.element.length()) {
1022:                            str = getString(this .lexbuf, start, len);
1023:                            if (Lexer.wstrcasecmp(str, container.element) == 0) {
1024:                                this .txtend = start - 2;
1025:                                break;
1026:                            }
1027:                        }
1028:
1029:                        this .lines = this .in.curline;
1030:                        this .columns = this .in.curcol - 3;
1031:
1032:                        Report.warning(this , null, null,
1033:                                Report.BAD_CDATA_CONTENT);
1034:
1035:                        /* if javascript insert backslash before / */
1036:
1037:                        if (ParserImpl.isJavaScript(container)) {
1038:                            for (i = this .lexsize; i > start - 1; --i)
1039:                                this .lexbuf[i] = this .lexbuf[i - 1];
1040:
1041:                            this .lexbuf[start - 1] = (byte) '\\';
1042:                            this .lexsize++;
1043:                        }
1044:
1045:                        start = -1;
1046:                    } else if (c == (int) '\r') {
1047:                        c = this .in.readChar();
1048:
1049:                        if (c != (int) '\n')
1050:                            this .in.ungetChar(c);
1051:
1052:                        c = (int) '\n';
1053:                    }
1054:
1055:                    addCharToLexer((int) c);
1056:                    this .txtend = this .lexsize;
1057:                    lastc = c;
1058:                }
1059:
1060:                if (c == StreamIn.EndOfStream)
1061:                    Report.warning(this , container, null,
1062:                            Report.MISSING_ENDTAG_FOR);
1063:
1064:                if (this .txtend > this .txtstart) {
1065:                    this .token = newNode(Node.TextNode, this .lexbuf,
1066:                            this .txtstart, this .txtend);
1067:                    return this .token;
1068:                }
1069:
1070:                return null;
1071:            }
1072:
1073:            public void ungetToken() {
1074:                this .pushed = true;
1075:            }
1076:
1077:            public static final short IgnoreWhitespace = 0;
1078:            public static final short MixedContent = 1;
1079:            public static final short Preformatted = 2;
1080:            public static final short IgnoreMarkup = 3;
1081:
1082:            /*
1083:              modes for GetToken()
1084:
1085:              MixedContent   -- for elements which don't accept PCDATA
1086:              Preformatted       -- white space preserved as is
1087:              IgnoreMarkup       -- for CDATA elements such as script, style
1088:             */
1089:
1090:            public Node getToken(short mode) {
1091:                short map;
1092:                int c = 0;
1093:                int lastc;
1094:                int badcomment = 0;
1095:                MutableBoolean isempty = new MutableBoolean();
1096:                AttVal attributes;
1097:
1098:                if (this .pushed) {
1099:                    /* duplicate inlines in preference to pushed text nodes when appropriate */
1100:                    if (this .token.type != Node.TextNode
1101:                            || (this .insert == -1 && this .inode == null)) {
1102:                        this .pushed = false;
1103:                        return this .token;
1104:                    }
1105:                }
1106:
1107:                /* at start of block elements, unclosed inline
1108:                   elements are inserted into the token stream */
1109:
1110:                if (this .insert != -1 || this .inode != null)
1111:                    return insertedToken();
1112:
1113:                this .lines = this .in.curline;
1114:                this .columns = this .in.curcol;
1115:                this .waswhite = false;
1116:
1117:                this .txtstart = this .lexsize;
1118:                this .txtend = this .lexsize;
1119:
1120:                while (true) {
1121:                    c = this .in.readChar();
1122:                    if (c == StreamIn.EndOfStream)
1123:                        break;
1124:                    if (this .insertspace && mode != IgnoreWhitespace) {
1125:                        addCharToLexer(' ');
1126:                        this .waswhite = true;
1127:                        this .insertspace = false;
1128:                    }
1129:
1130:                    /* treat \r\n as \n and \r as \n */
1131:
1132:                    if (c == '\r') {
1133:                        c = this .in.readChar();
1134:
1135:                        if (c != '\n')
1136:                            this .in.ungetChar(c);
1137:
1138:                        c = '\n';
1139:                    }
1140:
1141:                    addCharToLexer(c);
1142:
1143:                    switch (this .state) {
1144:                    case LEX_CONTENT: /* element content */
1145:                        map = MAP((char) c);
1146:
1147:                        /*
1148:                         Discard white space if appropriate. Its cheaper
1149:                         to do this here rather than in parser methods
1150:                         for elements that don't have mixed content.
1151:                         */
1152:                        if (((map & WHITE) != 0) && (mode == IgnoreWhitespace)
1153:                                && this .lexsize == this .txtstart + 1) {
1154:                            --this .lexsize;
1155:                            this .waswhite = false;
1156:                            this .lines = this .in.curline;
1157:                            this .columns = this .in.curcol;
1158:                            continue;
1159:                        }
1160:
1161:                        if (c == '<') {
1162:                            this .state = LEX_GT;
1163:                            continue;
1164:                        }
1165:
1166:                        if ((map & WHITE) != 0) {
1167:                            /* was previous char white? */
1168:                            if (this .waswhite) {
1169:                                if (mode != Preformatted
1170:                                        && mode != IgnoreMarkup) {
1171:                                    --this .lexsize;
1172:                                    this .lines = this .in.curline;
1173:                                    this .columns = this .in.curcol;
1174:                                }
1175:                            } else /* prev char wasn't white */
1176:                            {
1177:                                this .waswhite = true;
1178:                                lastc = c;
1179:
1180:                                if (mode != Preformatted
1181:                                        && mode != IgnoreMarkup && c != ' ')
1182:                                    changeChar((byte) ' ');
1183:                            }
1184:
1185:                            continue;
1186:                        } else if (c == '&' && mode != IgnoreMarkup)
1187:                            parseEntity(mode);
1188:
1189:                        /* this is needed to avoid trimming trailing whitespace */
1190:                        if (mode == IgnoreWhitespace)
1191:                            mode = MixedContent;
1192:
1193:                        this .waswhite = false;
1194:                        continue;
1195:
1196:                    case LEX_GT: /* < */
1197:
1198:                        /* check for endtag */
1199:                        if (c == '/') {
1200:                            c = this .in.readChar();
1201:                            if (c == StreamIn.EndOfStream) {
1202:                                this .in.ungetChar(c);
1203:                                continue;
1204:                            }
1205:
1206:                            addCharToLexer(c);
1207:                            map = MAP((char) c);
1208:
1209:                            if ((map & LETTER) != 0) {
1210:                                this .lexsize -= 3;
1211:                                this .txtend = this .lexsize;
1212:                                this .in.ungetChar(c);
1213:                                this .state = LEX_ENDTAG;
1214:                                this .lexbuf[this .lexsize] = (byte) '\0'; /* debug */
1215:                                this .in.curcol -= 2;
1216:
1217:                                /* if some text before the </ return it now */
1218:                                if (this .txtend > this .txtstart) {
1219:                                    /* trim space char before end tag */
1220:                                    if (mode == IgnoreWhitespace
1221:                                            && this .lexbuf[this .lexsize - 1] == (byte) ' ') {
1222:                                        this .lexsize -= 1;
1223:                                        this .txtend = this .lexsize;
1224:                                    }
1225:
1226:                                    this .token = newNode(Node.TextNode,
1227:                                            this .lexbuf, this .txtstart,
1228:                                            this .txtend);
1229:                                    return this .token;
1230:                                }
1231:
1232:                                continue; /* no text so keep going */
1233:                            }
1234:
1235:                            /* otherwise treat as CDATA */
1236:                            this .waswhite = false;
1237:                            this .state = LEX_CONTENT;
1238:                            continue;
1239:                        }
1240:
1241:                        if (mode == IgnoreMarkup) {
1242:                            /* otherwise treat as CDATA */
1243:                            this .waswhite = false;
1244:                            this .state = LEX_CONTENT;
1245:                            continue;
1246:                        }
1247:
1248:                        /*
1249:                           look out for comments, doctype or marked sections
1250:                           this isn't quite right, but its getting there ...
1251:                         */
1252:                        if (c == '!') {
1253:                            c = this .in.readChar();
1254:
1255:                            if (c == '-') {
1256:                                c = this .in.readChar();
1257:
1258:                                if (c == '-') {
1259:                                    this .state = LEX_COMMENT; /* comment */
1260:                                    this .lexsize -= 2;
1261:                                    this .txtend = this .lexsize;
1262:
1263:                                    /* if some text before < return it now */
1264:                                    if (this .txtend > this .txtstart) {
1265:                                        this .token = newNode(Node.TextNode,
1266:                                                this .lexbuf, this .txtstart,
1267:                                                this .txtend);
1268:                                        return this .token;
1269:                                    }
1270:
1271:                                    this .txtstart = this .lexsize;
1272:                                    continue;
1273:                                }
1274:
1275:                                Report.warning(this , null, null,
1276:                                        Report.MALFORMED_COMMENT);
1277:                            } else if (c == 'd' || c == 'D') {
1278:                                this .state = LEX_DOCTYPE; /* doctype */
1279:                                this .lexsize -= 2;
1280:                                this .txtend = this .lexsize;
1281:                                mode = IgnoreWhitespace;
1282:
1283:                                /* skip until white space or '>' */
1284:
1285:                                for (;;) {
1286:                                    c = this .in.readChar();
1287:
1288:                                    if (c == StreamIn.EndOfStream || c == '>') {
1289:                                        this .in.ungetChar(c);
1290:                                        break;
1291:                                    }
1292:
1293:                                    map = MAP((char) c);
1294:
1295:                                    if ((map & WHITE) == 0)
1296:                                        continue;
1297:
1298:                                    /* and skip to end of whitespace */
1299:
1300:                                    for (;;) {
1301:                                        c = this .in.readChar();
1302:
1303:                                        if (c == StreamIn.EndOfStream
1304:                                                || c == '>') {
1305:                                            this .in.ungetChar(c);
1306:                                            break;
1307:                                        }
1308:
1309:                                        map = MAP((char) c);
1310:
1311:                                        if ((map & WHITE) != 0)
1312:                                            continue;
1313:
1314:                                        this .in.ungetChar(c);
1315:                                        break;
1316:                                    }
1317:
1318:                                    break;
1319:                                }
1320:
1321:                                /* if some text before < return it now */
1322:                                if (this .txtend > this .txtstart) {
1323:                                    this .token = newNode(Node.TextNode,
1324:                                            this .lexbuf, this .txtstart,
1325:                                            this .txtend);
1326:                                    return this .token;
1327:                                }
1328:
1329:                                this .txtstart = this .lexsize;
1330:                                continue;
1331:                            } else if (c == '[') {
1332:                                /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
1333:                                this .lexsize -= 2;
1334:                                this .state = LEX_SECTION;
1335:                                this .txtend = this .lexsize;
1336:
1337:                                /* if some text before < return it now */
1338:                                if (this .txtend > this .txtstart) {
1339:                                    this .token = newNode(Node.TextNode,
1340:                                            this .lexbuf, this .txtstart,
1341:                                            this .txtend);
1342:                                    return this .token;
1343:                                }
1344:
1345:                                this .txtstart = this .lexsize;
1346:                                continue;
1347:                            }
1348:
1349:                            /* otherwise swallow chars up to and including next '>' */
1350:                            while (true) {
1351:                                c = this .in.readChar();
1352:                                if (c == '>')
1353:                                    break;
1354:                                if (c == -1) {
1355:                                    this .in.ungetChar(c);
1356:                                    break;
1357:                                }
1358:                            }
1359:
1360:                            this .lexsize -= 2;
1361:                            this .lexbuf[this .lexsize] = (byte) '\0';
1362:                            this .state = LEX_CONTENT;
1363:                            continue;
1364:                        }
1365:
1366:                        /*
1367:                           processing instructions
1368:                         */
1369:
1370:                        if (c == '?') {
1371:                            this .lexsize -= 2;
1372:                            this .state = LEX_PROCINSTR;
1373:                            this .txtend = this .lexsize;
1374:
1375:                            /* if some text before < return it now */
1376:                            if (this .txtend > this .txtstart) {
1377:                                this .token = newNode(Node.TextNode,
1378:                                        this .lexbuf, this .txtstart, this .txtend);
1379:                                return this .token;
1380:                            }
1381:
1382:                            this .txtstart = this .lexsize;
1383:                            continue;
1384:                        }
1385:
1386:                        /* Microsoft ASP's e.g. <% ... server-code ... %> */
1387:                        if (c == '%') {
1388:                            this .lexsize -= 2;
1389:                            this .state = LEX_ASP;
1390:                            this .txtend = this .lexsize;
1391:
1392:                            /* if some text before < return it now */
1393:                            if (this .txtend > this .txtstart) {
1394:                                this .token = newNode(Node.TextNode,
1395:                                        this .lexbuf, this .txtstart, this .txtend);
1396:                                return this .token;
1397:                            }
1398:
1399:                            this .txtstart = this .lexsize;
1400:                            continue;
1401:                        }
1402:
1403:                        /* Netscapes JSTE e.g. <# ... server-code ... #> */
1404:                        if (c == '#') {
1405:                            this .lexsize -= 2;
1406:                            this .state = LEX_JSTE;
1407:                            this .txtend = this .lexsize;
1408:
1409:                            /* if some text before < return it now */
1410:                            if (this .txtend > this .txtstart) {
1411:                                this .token = newNode(Node.TextNode,
1412:                                        this .lexbuf, this .txtstart, this .txtend);
1413:                                return this .token;
1414:                            }
1415:
1416:                            this .txtstart = this .lexsize;
1417:                            continue;
1418:                        }
1419:
1420:                        map = MAP((char) c);
1421:
1422:                        /* check for start tag */
1423:                        if ((map & LETTER) != 0) {
1424:                            this .in.ungetChar(c); /* push back letter */
1425:                            this .lexsize -= 2; /* discard "<" + letter */
1426:                            this .txtend = this .lexsize;
1427:                            this .state = LEX_STARTTAG; /* ready to read tag name */
1428:
1429:                            /* if some text before < return it now */
1430:                            if (this .txtend > this .txtstart) {
1431:                                this .token = newNode(Node.TextNode,
1432:                                        this .lexbuf, this .txtstart, this .txtend);
1433:                                return this .token;
1434:                            }
1435:
1436:                            continue; /* no text so keep going */
1437:                        }
1438:
1439:                        /* otherwise treat as CDATA */
1440:                        this .state = LEX_CONTENT;
1441:                        this .waswhite = false;
1442:                        continue;
1443:
1444:                    case LEX_ENDTAG: /* </letter */
1445:                        this .txtstart = this .lexsize - 1;
1446:                        this .in.curcol += 2;
1447:                        c = parseTagName();
1448:                        this .token = newNode(Node.EndTag, /* create endtag token */
1449:                        this .lexbuf, this .txtstart, this .txtend, getString(
1450:                                this .lexbuf, this .txtstart, this .txtend
1451:                                        - this .txtstart));
1452:                        this .lexsize = this .txtstart;
1453:                        this .txtend = this .txtstart;
1454:
1455:                        /* skip to '>' */
1456:                        while (c != '>') {
1457:                            c = this .in.readChar();
1458:
1459:                            if (c == StreamIn.EndOfStream)
1460:                                break;
1461:                        }
1462:
1463:                        if (c == StreamIn.EndOfStream) {
1464:                            this .in.ungetChar(c);
1465:                            continue;
1466:                        }
1467:
1468:                        this .state = LEX_CONTENT;
1469:                        this .waswhite = false;
1470:                        return this .token; /* the endtag token */
1471:
1472:                    case LEX_STARTTAG: /* first letter of tagname */
1473:                        this .txtstart = this .lexsize - 1; /* set txtstart to first letter */
1474:                        c = parseTagName();
1475:                        isempty.value = false;
1476:                        attributes = null;
1477:                        this .token = newNode((isempty.value ? Node.StartEndTag
1478:                                : Node.StartTag), this .lexbuf, this .txtstart,
1479:                                this .txtend, getString(this .lexbuf,
1480:                                        this .txtstart, this .txtend
1481:                                                - this .txtstart));
1482:
1483:                        /* parse attributes, consuming closing ">" */
1484:                        if (c != '>') {
1485:                            if (c == '/')
1486:                                this .in.ungetChar(c);
1487:
1488:                            attributes = parseAttrs(isempty);
1489:                        }
1490:
1491:                        if (isempty.value)
1492:                            this .token.type = Node.StartEndTag;
1493:
1494:                        this .token.attributes = attributes;
1495:                        this .lexsize = this .txtstart;
1496:                        this .txtend = this .txtstart;
1497:
1498:                        /* swallow newline following start tag */
1499:                        /* special check needed for CRLF sequence */
1500:                        /* this doesn't apply to empty elements */
1501:
1502:                        if (expectsContent(this .token)
1503:                                || this .token.tag == configuration.tt.tagBr) {
1504:
1505:                            c = this .in.readChar();
1506:
1507:                            if (c == '\r') {
1508:                                c = this .in.readChar();
1509:
1510:                                if (c != '\n')
1511:                                    this .in.ungetChar(c);
1512:                            } else if (c != '\n' && c != '\f')
1513:                                this .in.ungetChar(c);
1514:
1515:                            this .waswhite = true; /* to swallow leading whitespace */
1516:                        } else
1517:                            this .waswhite = false;
1518:
1519:                        this .state = LEX_CONTENT;
1520:
1521:                        if (this .token.tag == null)
1522:                            Report.error(this , null, this .token,
1523:                                    Report.UNKNOWN_ELEMENT);
1524:                        else if (!this .configuration.XmlTags) {
1525:                            this .versions &= this .token.tag.versions;
1526:
1527:                            if ((this .token.tag.versions & Dict.VERS_PROPRIETARY) != 0) {
1528:                                if (!this .configuration.MakeClean
1529:                                        && (this .token.tag == configuration.tt.tagNobr || this .token.tag == configuration.tt.tagWbr))
1530:                                    Report.warning(this , null, this .token,
1531:                                            Report.PROPRIETARY_ELEMENT);
1532:                            }
1533:
1534:                            if (this .token.tag.chkattrs != null) {
1535:                                this .token.checkUniqueAttributes(this );
1536:                                this .token.tag.chkattrs.check(this , this .token);
1537:                            } else
1538:                                this .token.checkAttributes(this );
1539:                        }
1540:
1541:                        return this .token; /* return start tag */
1542:
1543:                    case LEX_COMMENT: /* seen <!-- so look for --> */
1544:
1545:                        if (c != '-')
1546:                            continue;
1547:
1548:                        c = this .in.readChar();
1549:                        addCharToLexer(c);
1550:
1551:                        if (c != '-')
1552:                            continue;
1553:
1554:                        end_comment: while (true) {
1555:                            c = this .in.readChar();
1556:
1557:                            if (c == '>') {
1558:                                if (badcomment != 0)
1559:                                    Report.warning(this , null, null,
1560:                                            Report.MALFORMED_COMMENT);
1561:
1562:                                this .txtend = this .lexsize - 2; // AQ 8Jul2000
1563:                                this .lexbuf[this .lexsize] = (byte) '\0';
1564:                                this .state = LEX_CONTENT;
1565:                                this .waswhite = false;
1566:                                this .token = newNode(Node.CommentTag,
1567:                                        this .lexbuf, this .txtstart, this .txtend);
1568:
1569:                                /* now look for a line break */
1570:
1571:                                c = this .in.readChar();
1572:
1573:                                if (c == '\r') {
1574:                                    c = this .in.readChar();
1575:
1576:                                    if (c != '\n')
1577:                                        this .token.linebreak = true;
1578:                                }
1579:
1580:                                if (c == '\n')
1581:                                    this .token.linebreak = true;
1582:                                else
1583:                                    this .in.ungetChar(c);
1584:
1585:                                return this .token;
1586:                            }
1587:
1588:                            /* note position of first such error in the comment */
1589:                            if (badcomment == 0) {
1590:                                this .lines = this .in.curline;
1591:                                this .columns = this .in.curcol - 3;
1592:                            }
1593:
1594:                            badcomment++;
1595:                            if (this .configuration.FixComments)
1596:                                this .lexbuf[this .lexsize - 2] = (byte) '=';
1597:
1598:                            addCharToLexer(c);
1599:
1600:                            /* if '-' then look for '>' to end the comment */
1601:                            if (c != '-')
1602:                                break end_comment;
1603:
1604:                        }
1605:                        /* otherwise continue to look for --> */
1606:                        this .lexbuf[this .lexsize - 2] = (byte) '=';
1607:                        continue;
1608:
1609:                    case LEX_DOCTYPE: /* seen <!d so look for '>' munging whitespace */
1610:                        map = MAP((char) c);
1611:
1612:                        if ((map & WHITE) != 0) {
1613:                            if (this .waswhite)
1614:                                this .lexsize -= 1;
1615:
1616:                            this .waswhite = true;
1617:                        } else
1618:                            this .waswhite = false;
1619:
1620:                        if (c != '>')
1621:                            continue;
1622:
1623:                        this .lexsize -= 1;
1624:                        this .txtend = this .lexsize;
1625:                        this .lexbuf[this .lexsize] = (byte) '\0';
1626:                        this .state = LEX_CONTENT;
1627:                        this .waswhite = false;
1628:                        this .token = newNode(Node.DocTypeTag, this .lexbuf,
1629:                                this .txtstart, this .txtend);
1630:                        /* make a note of the version named by the doctype */
1631:                        this .doctype = findGivenVersion(this .token);
1632:                        return this .token;
1633:
1634:                    case LEX_PROCINSTR: /* seen <? so look for '>' */
1635:                        /* check for PHP preprocessor instructions <?php ... ?> */
1636:
1637:                        if (this .lexsize - this .txtstart == 3) {
1638:                            if ((getString(this .lexbuf, this .txtstart, 3))
1639:                                    .equals("php")) {
1640:                                this .state = LEX_PHP;
1641:                                continue;
1642:                            }
1643:                        }
1644:
1645:                        if (this .configuration.XmlPIs) /* insist on ?> as terminator */
1646:                        {
1647:                            if (c != '?')
1648:                                continue;
1649:
1650:                            /* now look for '>' */
1651:                            c = this .in.readChar();
1652:
1653:                            if (c == StreamIn.EndOfStream) {
1654:                                Report.warning(this , null, null,
1655:                                        Report.UNEXPECTED_END_OF_FILE);
1656:                                this .in.ungetChar(c);
1657:                                continue;
1658:                            }
1659:
1660:                            addCharToLexer(c);
1661:                        }
1662:
1663:                        if (c != '>')
1664:                            continue;
1665:
1666:                        this .lexsize -= 1;
1667:                        this .txtend = this .lexsize;
1668:                        this .lexbuf[this .lexsize] = (byte) '\0';
1669:                        this .state = LEX_CONTENT;
1670:                        this .waswhite = false;
1671:                        this .token = newNode(Node.ProcInsTag, this .lexbuf,
1672:                                this .txtstart, this .txtend);
1673:                        return this .token;
1674:
1675:                    case LEX_ASP: /* seen <% so look for "%>" */
1676:                        if (c != '%')
1677:                            continue;
1678:
1679:                        /* now look for '>' */
1680:                        c = this .in.readChar();
1681:
1682:                        if (c != '>') {
1683:                            this .in.ungetChar(c);
1684:                            continue;
1685:                        }
1686:
1687:                        this .lexsize -= 1;
1688:                        this .txtend = this .lexsize;
1689:                        this .lexbuf[this .lexsize] = (byte) '\0';
1690:                        this .state = LEX_CONTENT;
1691:                        this .waswhite = false;
1692:                        this .token = newNode(Node.AspTag, this .lexbuf,
1693:                                this .txtstart, this .txtend);
1694:                        return this .token;
1695:
1696:                    case LEX_JSTE: /* seen <# so look for "#>" */
1697:                        if (c != '#')
1698:                            continue;
1699:
1700:                        /* now look for '>' */
1701:                        c = this .in.readChar();
1702:
1703:                        if (c != '>') {
1704:                            this .in.ungetChar(c);
1705:                            continue;
1706:                        }
1707:
1708:                        this .lexsize -= 1;
1709:                        this .txtend = this .lexsize;
1710:                        this .lexbuf[this .lexsize] = (byte) '\0';
1711:                        this .state = LEX_CONTENT;
1712:                        this .waswhite = false;
1713:                        this .token = newNode(Node.JsteTag, this .lexbuf,
1714:                                this .txtstart, this .txtend);
1715:                        return this .token;
1716:
1717:                    case LEX_PHP: /* seen "<?php" so look for "?>" */
1718:                        if (c != '?')
1719:                            continue;
1720:
1721:                        /* now look for '>' */
1722:                        c = this .in.readChar();
1723:
1724:                        if (c != '>') {
1725:                            this .in.ungetChar(c);
1726:                            continue;
1727:                        }
1728:
1729:                        this .lexsize -= 1;
1730:                        this .txtend = this .lexsize;
1731:                        this .lexbuf[this .lexsize] = (byte) '\0';
1732:                        this .state = LEX_CONTENT;
1733:                        this .waswhite = false;
1734:                        this .token = newNode(Node.PhpTag, this .lexbuf,
1735:                                this .txtstart, this .txtend);
1736:                        return this .token;
1737:
1738:                    case LEX_SECTION: /* seen "<![" so look for "]>" */
1739:                        if (c == '[') {
1740:                            if (this .lexsize == (this .txtstart + 6)
1741:                                    && (getString(this .lexbuf, this .txtstart, 6))
1742:                                            .equals("CDATA[")) {
1743:                                this .state = LEX_CDATA;
1744:                                this .lexsize -= 6;
1745:                                continue;
1746:                            }
1747:                        }
1748:
1749:                        if (c != ']')
1750:                            continue;
1751:
1752:                        /* now look for '>' */
1753:                        c = this .in.readChar();
1754:
1755:                        if (c != '>') {
1756:                            this .in.ungetChar(c);
1757:                            continue;
1758:                        }
1759:
1760:                        this .lexsize -= 1;
1761:                        this .txtend = this .lexsize;
1762:                        this .lexbuf[this .lexsize] = (byte) '\0';
1763:                        this .state = LEX_CONTENT;
1764:                        this .waswhite = false;
1765:                        this .token = newNode(Node.SectionTag, this .lexbuf,
1766:                                this .txtstart, this .txtend);
1767:                        return this .token;
1768:
1769:                    case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */
1770:                        if (c != ']')
1771:                            continue;
1772:
1773:                        /* now look for ']' */
1774:                        c = this .in.readChar();
1775:
1776:                        if (c != ']') {
1777:                            this .in.ungetChar(c);
1778:                            continue;
1779:                        }
1780:
1781:                        /* now look for '>' */
1782:                        c = this .in.readChar();
1783:
1784:                        if (c != '>') {
1785:                            this .in.ungetChar(c);
1786:                            continue;
1787:                        }
1788:
1789:                        this .lexsize -= 1;
1790:                        this .txtend = this .lexsize;
1791:                        this .lexbuf[this .lexsize] = (byte) '\0';
1792:                        this .state = LEX_CONTENT;
1793:                        this .waswhite = false;
1794:                        this .token = newNode(Node.CDATATag, this .lexbuf,
1795:                                this .txtstart, this .txtend);
1796:                        return this .token;
1797:                    }
1798:                }
1799:
1800:                if (this .state == LEX_CONTENT) /* text string */
1801:                {
1802:                    this .txtend = this .lexsize;
1803:
1804:                    if (this .txtend > this .txtstart) {
1805:                        this .in.ungetChar(c);
1806:
1807:                        if (this .lexbuf[this .lexsize - 1] == (byte) ' ') {
1808:                            this .lexsize -= 1;
1809:                            this .txtend = this .lexsize;
1810:                        }
1811:
1812:                        this .token = newNode(Node.TextNode, this .lexbuf,
1813:                                this .txtstart, this .txtend);
1814:                        return this .token;
1815:                    }
1816:                } else if (this .state == LEX_COMMENT) /* comment */
1817:                {
1818:                    if (c == StreamIn.EndOfStream)
1819:                        Report.warning(this , null, null,
1820:                                Report.MALFORMED_COMMENT);
1821:
1822:                    this .txtend = this .lexsize;
1823:                    this .lexbuf[this .lexsize] = (byte) '\0';
1824:                    this .state = LEX_CONTENT;
1825:                    this .waswhite = false;
1826:                    this .token = newNode(Node.CommentTag, this .lexbuf,
1827:                            this .txtstart, this .txtend);
1828:                    return this .token;
1829:                }
1830:
1831:                return null;
1832:            }
1833:
1834:            /*
1835:             parser for ASP within start tags
1836:
1837:             Some people use ASP for to customize attributes
1838:             Tidy isn't really well suited to dealing with ASP
1839:             This is a workaround for attributes, but won't
1840:             deal with the case where the ASP is used to tailor
1841:             the attribute value. Here is an example of a work
1842:             around for using ASP in attribute values:
1843:
1844:              href="<%=rsSchool.Fields("ID").Value%>"
1845:
1846:             where the ASP that generates the attribute value
1847:             is masked from Tidy by the quotemarks.
1848:
1849:             */
1850:
1851:            public Node parseAsp() {
1852:                int c;
1853:                Node asp = null;
1854:
1855:                this .txtstart = this .lexsize;
1856:
1857:                for (;;) {
1858:                    c = this .in.readChar();
1859:                    addCharToLexer(c);
1860:
1861:                    if (c != '%')
1862:                        continue;
1863:
1864:                    c = this .in.readChar();
1865:                    addCharToLexer(c);
1866:
1867:                    if (c == '>')
1868:                        break;
1869:                }
1870:
1871:                this .lexsize -= 2;
1872:                this .txtend = this .lexsize;
1873:
1874:                if (this .txtend > this .txtstart)
1875:                    asp = newNode(Node.AspTag, this .lexbuf, this .txtstart,
1876:                            this .txtend);
1877:
1878:                this .txtstart = this .txtend;
1879:                return asp;
1880:            }
1881:
1882:            /*
1883:             PHP is like ASP but is based upon XML
1884:             processing instructions, e.g. <?php ... ?>
1885:             */
1886:            public Node parsePhp() {
1887:                int c;
1888:                Node php = null;
1889:
1890:                this .txtstart = this .lexsize;
1891:
1892:                for (;;) {
1893:                    c = this .in.readChar();
1894:                    addCharToLexer(c);
1895:
1896:                    if (c != '?')
1897:                        continue;
1898:
1899:                    c = this .in.readChar();
1900:                    addCharToLexer(c);
1901:
1902:                    if (c == '>')
1903:                        break;
1904:                }
1905:
1906:                this .lexsize -= 2;
1907:                this .txtend = this .lexsize;
1908:
1909:                if (this .txtend > this .txtstart)
1910:                    php = newNode(Node.PhpTag, this .lexbuf, this .txtstart,
1911:                            this .txtend);
1912:
1913:                this .txtstart = this .txtend;
1914:                return php;
1915:            }
1916:
1917:            /* consumes the '>' terminating start tags */
1918:            public String parseAttribute(MutableBoolean isempty,
1919:                    MutableObject asp, MutableObject php) {
1920:                int start = 0;
1921:                // int len = 0;   Removed by BUGFIX for 126265
1922:                short map;
1923:                String attr;
1924:                int c = 0;
1925:
1926:                asp.setObject(null); /* clear asp pointer */
1927:                php.setObject(null); /* clear php pointer */
1928:                /* skip white space before the attribute */
1929:
1930:                for (;;) {
1931:                    c = this .in.readChar();
1932:
1933:                    if (c == '/') {
1934:                        c = this .in.readChar();
1935:
1936:                        if (c == '>') {
1937:                            isempty.value = true;
1938:                            return null;
1939:                        }
1940:
1941:                        this .in.ungetChar(c);
1942:                        c = '/';
1943:                        break;
1944:                    }
1945:
1946:                    if (c == '>')
1947:                        return null;
1948:
1949:                    if (c == '<') {
1950:                        c = this .in.readChar();
1951:
1952:                        if (c == '%') {
1953:                            asp.setObject(parseAsp());
1954:                            return null;
1955:                        } else if (c == '?') {
1956:                            php.setObject(parsePhp());
1957:                            return null;
1958:                        }
1959:
1960:                        this .in.ungetChar(c);
1961:                        Report.attrError(this , this .token, null,
1962:                                Report.UNEXPECTED_GT);
1963:                        return null;
1964:                    }
1965:
1966:                    if (c == '"' || c == '\'') {
1967:                        Report.attrError(this , this .token, null,
1968:                                Report.UNEXPECTED_QUOTEMARK);
1969:                        continue;
1970:                    }
1971:
1972:                    if (c == StreamIn.EndOfStream) {
1973:                        Report.attrError(this , this .token, null,
1974:                                Report.UNEXPECTED_END_OF_FILE);
1975:                        this .in.ungetChar(c);
1976:                        return null;
1977:                    }
1978:
1979:                    map = MAP((char) c);
1980:
1981:                    if ((map & WHITE) == 0)
1982:                        break;
1983:                }
1984:
1985:                start = this .lexsize;
1986:
1987:                for (;;) {
1988:                    /* but push back '=' for parseValue() */
1989:                    if (c == '=' || c == '>') {
1990:                        this .in.ungetChar(c);
1991:                        break;
1992:                    }
1993:
1994:                    if (c == '<' || c == StreamIn.EndOfStream) {
1995:                        this .in.ungetChar(c);
1996:                        break;
1997:                    }
1998:
1999:                    map = MAP((char) c);
2000:
2001:                    if ((map & WHITE) != 0)
2002:                        break;
2003:
2004:                    /* what should be done about non-namechar characters? */
2005:                    /* currently these are incorporated into the attr name */
2006:
2007:                    if (!this .configuration.XmlTags && (map & UPPERCASE) != 0)
2008:                        c += (int) ('a' - 'A');
2009:
2010:                    //  ++len;    Removed by BUGFIX for 126265 
2011:                    addCharToLexer(c);
2012:
2013:                    c = this .in.readChar();
2014:                }
2015:
2016:                // Following line added by GLP to fix BUG 126265.  This is a temporary comment
2017:                // and should be removed when Tidy is fixed.
2018:                int len = this .lexsize - start;
2019:                attr = (len > 0 ? getString(this .lexbuf, start, len) : null);
2020:                this .lexsize = start;
2021:
2022:                return attr;
2023:            }
2024:
2025:            /*
2026:             invoked when < is seen in place of attribute value
2027:             but terminates on whitespace if not ASP, PHP or Tango
2028:             this routine recognizes ' and " quoted strings
2029:             */
2030:            public int parseServerInstruction() {
2031:                int c, map, delim = '"';
2032:                boolean isrule = false;
2033:
2034:                c = this .in.readChar();
2035:                addCharToLexer(c);
2036:
2037:                /* check for ASP, PHP or Tango */
2038:                if (c == '%' || c == '?' || c == '@')
2039:                    isrule = true;
2040:
2041:                for (;;) {
2042:                    c = this .in.readChar();
2043:
2044:                    if (c == StreamIn.EndOfStream)
2045:                        break;
2046:
2047:                    if (c == '>') {
2048:                        if (isrule)
2049:                            addCharToLexer(c);
2050:                        else
2051:                            this .in.ungetChar(c);
2052:
2053:                        break;
2054:                    }
2055:
2056:                    /* if not recognized as ASP, PHP or Tango */
2057:                    /* then also finish value on whitespace */
2058:                    if (!isrule) {
2059:                        map = MAP((char) c);
2060:
2061:                        if ((map & WHITE) != 0)
2062:                            break;
2063:                    }
2064:
2065:                    addCharToLexer(c);
2066:
2067:                    if (c == '"') {
2068:                        do {
2069:                            c = this .in.readChar();
2070:                            addCharToLexer(c);
2071:                        } while (c != '"');
2072:                        delim = '\'';
2073:                        continue;
2074:                    }
2075:
2076:                    if (c == '\'') {
2077:                        do {
2078:                            c = this .in.readChar();
2079:                            addCharToLexer(c);
2080:                        } while (c != '\'');
2081:                    }
2082:                }
2083:
2084:                return delim;
2085:            }
2086:
2087:            /* values start with "=" or " = " etc. */
2088:            /* doesn't consume the ">" at end of start tag */
2089:
2090:            public String parseValue(String name, boolean foldCase,
2091:                    MutableBoolean isempty, MutableInteger pdelim) {
2092:                int len = 0;
2093:                int start;
2094:                short map;
2095:                boolean seen_gt = false;
2096:                boolean munge = true;
2097:                int c = 0;
2098:                int lastc, delim, quotewarning;
2099:                String value;
2100:
2101:                delim = 0;
2102:                pdelim.value = (int) '"';
2103:
2104:                /*
2105:                 Henry Zrepa reports that some folk are using the
2106:                 embed element with script attributes where newlines
2107:                 are significant and must be preserved
2108:                 */
2109:                if (configuration.LiteralAttribs)
2110:                    munge = false;
2111:
2112:                /* skip white space before the '=' */
2113:
2114:                for (;;) {
2115:                    c = this .in.readChar();
2116:
2117:                    if (c == StreamIn.EndOfStream) {
2118:                        this .in.ungetChar(c);
2119:                        break;
2120:                    }
2121:
2122:                    map = MAP((char) c);
2123:
2124:                    if ((map & WHITE) == 0)
2125:                        break;
2126:                }
2127:
2128:                /*
2129:                  c should be '=' if there is a value
2130:                  other legal possibilities are white
2131:                  space, '/' and '>'
2132:                 */
2133:
2134:                if (c != '=') {
2135:                    this .in.ungetChar(c);
2136:                    return null;
2137:                }
2138:
2139:                /* skip white space after '=' */
2140:
2141:                for (;;) {
2142:                    c = this .in.readChar();
2143:
2144:                    if (c == StreamIn.EndOfStream) {
2145:                        this .in.ungetChar(c);
2146:                        break;
2147:                    }
2148:
2149:                    map = MAP((char) c);
2150:
2151:                    if ((map & WHITE) == 0)
2152:                        break;
2153:                }
2154:
2155:                /* check for quote marks */
2156:
2157:                if (c == '"' || c == '\'')
2158:                    delim = c;
2159:                else if (c == '<') {
2160:                    start = this .lexsize;
2161:                    addCharToLexer(c);
2162:                    pdelim.value = parseServerInstruction();
2163:                    len = this .lexsize - start;
2164:                    this .lexsize = start;
2165:                    return (len > 0 ? getString(this .lexbuf, start, len) : null);
2166:                } else
2167:                    this .in.ungetChar(c);
2168:
2169:                /*
2170:                  and read the value string
2171:                  check for quote mark if needed
2172:                 */
2173:
2174:                quotewarning = 0;
2175:                start = this .lexsize;
2176:                c = '\0';
2177:
2178:                for (;;) {
2179:                    lastc = c; /* track last character */
2180:                    c = this .in.readChar();
2181:
2182:                    if (c == StreamIn.EndOfStream) {
2183:                        Report.attrError(this , this .token, null,
2184:                                Report.UNEXPECTED_END_OF_FILE);
2185:                        this .in.ungetChar(c);
2186:                        break;
2187:                    }
2188:
2189:                    if (delim == (char) 0) {
2190:                        if (c == '>') {
2191:                            this .in.ungetChar(c);
2192:                            break;
2193:                        }
2194:
2195:                        if (c == '"' || c == '\'') {
2196:                            Report.attrError(this , this .token, null,
2197:                                    Report.UNEXPECTED_QUOTEMARK);
2198:                            break;
2199:                        }
2200:
2201:                        if (c == '<') {
2202:                            /* this.in.ungetChar(c); */
2203:                            Report.attrError(this , this .token, null,
2204:                                    Report.UNEXPECTED_GT);
2205:                            /* break; */
2206:                        }
2207:
2208:                        /*
2209:                         For cases like <br clear=all/> need to avoid treating /> as
2210:                         part of the attribute value, however care is needed to avoid
2211:                         so treating <a href=http://www.acme.com/> in this way, which
2212:                         would map the <a> tag to <a href="http://www.acme.com"/>
2213:                         */
2214:                        if (c == '/') {
2215:                            /* peek ahead in case of /> */
2216:                            c = this .in.readChar();
2217:
2218:                            if (c == '>'
2219:                                    && !AttributeTable
2220:                                            .getDefaultAttributeTable().isUrl(
2221:                                                    name)) {
2222:                                isempty.value = true;
2223:                                this .in.ungetChar(c);
2224:                                break;
2225:                            }
2226:
2227:                            /* unget peeked char */
2228:                            this .in.ungetChar(c);
2229:                            c = '/';
2230:                        }
2231:                    } else /* delim is '\'' or '"' */
2232:                    {
2233:                        if (c == delim)
2234:                            break;
2235:
2236:                        /* treat CRLF, CR and LF as single line break */
2237:
2238:                        if (c == '\r') {
2239:                            c = this .in.readChar();
2240:                            if (c != '\n')
2241:                                this .in.ungetChar(c);
2242:
2243:                            c = '\n';
2244:                        }
2245:
2246:                        if (c == '\n' || c == '<' || c == '>')
2247:                            ++quotewarning;
2248:
2249:                        if (c == '>')
2250:                            seen_gt = true;
2251:                    }
2252:
2253:                    if (c == '&') {
2254:                        addCharToLexer(c);
2255:                        parseEntity((short) 0);
2256:                        continue;
2257:                    }
2258:
2259:                    /*
2260:                     kludge for JavaScript attribute values
2261:                     with line continuations in string literals
2262:                     */
2263:                    if (c == '\\') {
2264:                        c = this .in.readChar();
2265:
2266:                        if (c != '\n') {
2267:                            this .in.ungetChar(c);
2268:                            c = '\\';
2269:                        }
2270:                    }
2271:
2272:                    map = MAP((char) c);
2273:
2274:                    if ((map & WHITE) != 0) {
2275:                        if (delim == (char) 0)
2276:                            break;
2277:
2278:                        if (munge) {
2279:                            c = ' ';
2280:
2281:                            if (lastc == ' ')
2282:                                continue;
2283:                        }
2284:                    } else if (foldCase && (map & UPPERCASE) != 0)
2285:                        c += (int) ('a' - 'A');
2286:
2287:                    addCharToLexer(c);
2288:                }
2289:
2290:                if (quotewarning > 10 && seen_gt && munge) {
2291:                    /*
2292:                       there is almost certainly a missing trailling quote mark
2293:                       as we have see too many newlines, < or > characters.
2294:
2295:                       an exception is made for Javascript attributes and the
2296:                       javascript URL scheme which may legitimately include < and >
2297:                     */
2298:                    if (!AttributeTable.getDefaultAttributeTable().isScript(
2299:                            name)
2300:                            && !(AttributeTable.getDefaultAttributeTable()
2301:                                    .isUrl(name) && (getString(this .lexbuf,
2302:                                    start, 11)).equals("javascript:")))
2303:                        Report.error(this , null, null,
2304:                                Report.SUSPECTED_MISSING_QUOTE);
2305:                }
2306:
2307:                len = this .lexsize - start;
2308:                this .lexsize = start;
2309:
2310:                if (len > 0 || delim != 0)
2311:                    value = getString(this .lexbuf, start, len);
2312:                else
2313:                    value = null;
2314:
2315:                /* note delimiter if given */
2316:                if (delim != 0)
2317:                    pdelim.value = delim;
2318:                else
2319:                    pdelim.value = (int) '"';
2320:
2321:                return value;
2322:            }
2323:
2324:            /* attr must be non-null */
2325:            public static boolean isValidAttrName(String attr) {
2326:                short map;
2327:                char c;
2328:                int i;
2329:
2330:                /* first character should be a letter */
2331:                c = attr.charAt(0);
2332:                map = MAP(c);
2333:
2334:                if (!((map & LETTER) != 0))
2335:                    return false;
2336:
2337:                /* remaining characters should be namechars */
2338:                for (i = 1; i < attr.length(); i++) {
2339:                    c = attr.charAt(i);
2340:                    map = MAP(c);
2341:
2342:                    if ((map & NAMECHAR) != 0)
2343:                        continue;
2344:
2345:                    return false;
2346:                }
2347:
2348:                return true;
2349:            }
2350:
2351:            /* swallows closing '>' */
2352:
2353:            public AttVal parseAttrs(MutableBoolean isempty) {
2354:                AttVal av, list;
2355:                String attribute, value;
2356:                MutableInteger delim = new MutableInteger();
2357:                MutableObject asp = new MutableObject();
2358:                MutableObject php = new MutableObject();
2359:
2360:                list = null;
2361:
2362:                for (; !endOfInput();) {
2363:                    attribute = parseAttribute(isempty, asp, php);
2364:
2365:                    if (attribute == null) {
2366:                        /* check if attributes are created by ASP markup */
2367:                        if (asp.getObject() != null) {
2368:                            av = new AttVal(list, null, (Node) asp.getObject(),
2369:                                    null, '\0', null, null);
2370:                            list = av;
2371:                            continue;
2372:                        }
2373:
2374:                        /* check if attributes are created by PHP markup */
2375:                        if (php.getObject() != null) {
2376:                            av = new AttVal(list, null, null, (Node) php
2377:                                    .getObject(), '\0', null, null);
2378:                            list = av;
2379:                            continue;
2380:                        }
2381:
2382:                        break;
2383:                    }
2384:
2385:                    value = parseValue(attribute, false, isempty, delim);
2386:
2387:                    if (attribute != null && isValidAttrName(attribute)) {
2388:                        av = new AttVal(list, null, null, null, delim.value,
2389:                                attribute, value);
2390:                        av.dict = AttributeTable.getDefaultAttributeTable()
2391:                                .findAttribute(av);
2392:                        list = av;
2393:                    } else {
2394:                        av = new AttVal(null, null, null, null, 0, attribute,
2395:                                value);
2396:                        Report.attrError(this , this .token, value,
2397:                                Report.BAD_ATTRIBUTE_VALUE);
2398:                    }
2399:                }
2400:
2401:                return list;
2402:            }
2403:
2404:            /*
2405:              push a copy of an inline node onto stack
2406:              but don't push if implicit or OBJECT or APPLET
2407:              (implicit tags are ones generated from the istack)
2408:
2409:              One issue arises with pushing inlines when
2410:              the tag is already pushed. For instance:
2411:
2412:                  <p><em>text
2413:                  <p><em>more text
2414:
2415:              Shouldn't be mapped to
2416:
2417:                  <p><em>text</em></p>
2418:                  <p><em><em>more text</em></em>
2419:             */
2420:            public void pushInline(Node node) {
2421:                IStack is;
2422:
2423:                if (node.implicit)
2424:                    return;
2425:
2426:                if (node.tag == null)
2427:                    return;
2428:
2429:                if ((node.tag.model & Dict.CM_INLINE) == 0)
2430:                    return;
2431:
2432:                if ((node.tag.model & Dict.CM_OBJECT) != 0)
2433:                    return;
2434:
2435:                if (node.tag != configuration.tt.tagFont && isPushed(node))
2436:                    return;
2437:
2438:                // make sure there is enough space for the stack
2439:                is = new IStack();
2440:                is.tag = node.tag;
2441:                is.element = node.element;
2442:                if (node.attributes != null)
2443:                    is.attributes = cloneAttributes(node.attributes);
2444:                this .istack.push(is);
2445:            }
2446:
2447:            /* pop inline stack */
2448:            public void popInline(Node node) {
2449:                AttVal av;
2450:                IStack is;
2451:
2452:                if (node != null) {
2453:
2454:                    if (node.tag == null)
2455:                        return;
2456:
2457:                    if ((node.tag.model & Dict.CM_INLINE) == 0)
2458:                        return;
2459:
2460:                    if ((node.tag.model & Dict.CM_OBJECT) != 0)
2461:                        return;
2462:
2463:                    // if node is </a> then pop until we find an <a>
2464:                    if (node.tag == configuration.tt.tagA) {
2465:
2466:                        while (this .istack.size() > 0) {
2467:                            is = (IStack) this .istack.pop();
2468:                            if (is.tag == configuration.tt.tagA) {
2469:                                break;
2470:                            }
2471:                        }
2472:
2473:                        if (this .insert >= this .istack.size())
2474:                            this .insert = -1;
2475:                        return;
2476:                    }
2477:                }
2478:
2479:                if (this .istack.size() > 0) {
2480:                    is = (IStack) this .istack.pop();
2481:                    if (this .insert >= this .istack.size())
2482:                        this .insert = -1;
2483:                }
2484:            }
2485:
2486:            public boolean isPushed(Node node) {
2487:                int i;
2488:                IStack is;
2489:
2490:                for (i = this .istack.size() - 1; i >= 0; --i) {
2491:                    is = (IStack) this .istack.elementAt(i);
2492:                    if (is.tag == node.tag)
2493:                        return true;
2494:                }
2495:
2496:                return false;
2497:            }
2498:
2499:            /*
2500:              This has the effect of inserting "missing" inline
2501:              elements around the contents of blocklevel elements
2502:              such as P, TD, TH, DIV, PRE etc. This procedure is
2503:              called at the start of ParseBlock. when the inline
2504:              stack is not empty, as will be the case in:
2505:
2506:                <i><h1>italic heading</h1></i>
2507:
2508:              which is then treated as equivalent to
2509:
2510:                <h1><i>italic heading</i></h1>
2511:
2512:              This is implemented by setting the lexer into a mode
2513:              where it gets tokens from the inline stack rather than
2514:              from the input stream.
2515:             */
2516:            public int inlineDup(Node node) {
2517:                int n;
2518:
2519:                n = this .istack.size() - this .istackbase;
2520:                if (n > 0) {
2521:                    this .insert = this .istackbase;
2522:                    this .inode = node;
2523:                }
2524:
2525:                return n;
2526:            }
2527:
2528:            public Node insertedToken() {
2529:                Node node;
2530:                IStack is;
2531:                int n;
2532:
2533:                // this will only be null if inode != null
2534:                if (this .insert == -1) {
2535:                    node = this .inode;
2536:                    this .inode = null;
2537:                    return node;
2538:                }
2539:
2540:                // is this is the "latest" node then update
2541:                // the position, otherwise use current values
2542:
2543:                if (this .inode == null) {
2544:                    this .lines = this .in.curline;
2545:                    this .columns = this .in.curcol;
2546:                }
2547:
2548:                node = newNode(Node.StartTag, this .lexbuf, this .txtstart,
2549:                        this .txtend); // GLP:  Bugfix 126261.  Remove when this change
2550:                //       is fixed in istack.c in the original Tidy
2551:                node.implicit = true;
2552:                is = (IStack) this .istack.elementAt(this .insert);
2553:                node.element = is.element;
2554:                node.tag = is.tag;
2555:                if (is.attributes != null)
2556:                    node.attributes = cloneAttributes(is.attributes);
2557:
2558:                // advance lexer to next item on the stack
2559:                n = this .insert;
2560:
2561:                // and recover state if we have reached the end
2562:                if (++n < this .istack.size()) {
2563:                    this .insert = n;
2564:                } else {
2565:                    this .insert = -1;
2566:                }
2567:
2568:                return node;
2569:            }
2570:
2571:            /* AQ: Try this for speed optimization */
2572:            public static int wstrcasecmp(String s1, String s2) {
2573:                return (s1.equalsIgnoreCase(s2) ? 0 : 1);
2574:            }
2575:
2576:            public static int wstrcaselexcmp(String s1, String s2) {
2577:                char c;
2578:                int i = 0;
2579:
2580:                while (i < s1.length() && i < s2.length()) {
2581:                    c = s1.charAt(i);
2582:                    if (toLower(c) != toLower(s2.charAt(i))) {
2583:                        break;
2584:                    }
2585:                    i += 1;
2586:                }
2587:                if (i == s1.length() && i == s2.length()) {
2588:                    return 0;
2589:                } else if (i == s1.length()) {
2590:                    return -1;
2591:                } else if (i == s2.length()) {
2592:                    return 1;
2593:                } else {
2594:                    return (s1.charAt(i) > s2.charAt(i) ? 1 : -1);
2595:                }
2596:            }
2597:
2598:            public static boolean wsubstr(String s1, String s2) {
2599:                int i;
2600:                int len1 = s1.length();
2601:                int len2 = s2.length();
2602:
2603:                for (i = 0; i <= len1 - len2; ++i) {
2604:                    if (s2.equalsIgnoreCase(s1.substring(i)))
2605:                        return true;
2606:                }
2607:
2608:                return false;
2609:            }
2610:
2611:            public boolean canPrune(Node element) {
2612:                if (element.type == Node.TextNode)
2613:                    return true;
2614:
2615:                if (element.content != null)
2616:                    return false;
2617:
2618:                if (element.tag == configuration.tt.tagA
2619:                        && element.attributes != null)
2620:                    return false;
2621:
2622:                if (element.tag == configuration.tt.tagP
2623:                        && !this .configuration.DropEmptyParas)
2624:                    return false;
2625:
2626:                if (element.tag == null)
2627:                    return false;
2628:
2629:                if ((element.tag.model & Dict.CM_ROW) != 0)
2630:                    return false;
2631:
2632:                if (element.tag == configuration.tt.tagApplet)
2633:                    return false;
2634:
2635:                if (element.tag == configuration.tt.tagObject)
2636:                    return false;
2637:
2638:                if (element.attributes != null
2639:                        && (element.getAttrByName("id") != null || element
2640:                                .getAttrByName("name") != null))
2641:                    return false;
2642:
2643:                return true;
2644:            }
2645:
2646:            /* duplicate name attribute as an id */
2647:            public void fixId(Node node) {
2648:                AttVal name = node.getAttrByName("name");
2649:                AttVal id = node.getAttrByName("id");
2650:
2651:                if (name != null) {
2652:                    if (id != null) {
2653:                        if (!id.value.equals(name.value))
2654:                            Report.attrError(this , node, "name",
2655:                                    Report.ID_NAME_MISMATCH);
2656:                    } else if (this .configuration.XmlOut)
2657:                        node.addAttribute("id", name.value);
2658:                }
2659:            }
2660:
2661:            /*
2662:             defer duplicates when entering a table or other
2663:             element where the inlines shouldn't be duplicated
2664:             */
2665:            public void deferDup() {
2666:                this .insert = -1;
2667:                this .inode = null;
2668:            }
2669:
2670:            /* Private methods and fields */
2671:
2672:            /* lexer char types */
2673:            private static final short DIGIT = 1;
2674:            private static final short LETTER = 2;
2675:            private static final short NAMECHAR = 4;
2676:            private static final short WHITE = 8;
2677:            private static final short NEWLINE = 16;
2678:            private static final short LOWERCASE = 32;
2679:            private static final short UPPERCASE = 64;
2680:
2681:            /* lexer GetToken states */
2682:
2683:            private static final short LEX_CONTENT = 0;
2684:            private static final short LEX_GT = 1;
2685:            private static final short LEX_ENDTAG = 2;
2686:            private static final short LEX_STARTTAG = 3;
2687:            private static final short LEX_COMMENT = 4;
2688:            private static final short LEX_DOCTYPE = 5;
2689:            private static final short LEX_PROCINSTR = 6;
2690:            private static final short LEX_ENDCOMMENT = 7;
2691:            private static final short LEX_CDATA = 8;
2692:            private static final short LEX_SECTION = 9;
2693:            private static final short LEX_ASP = 10;
2694:            private static final short LEX_JSTE = 11;
2695:            private static final short LEX_PHP = 12;
2696:
2697:            /* used to classify chars for lexical purposes */
2698:            private static short[] lexmap = new short[128];
2699:
2700:            private static void mapStr(String str, short code) {
2701:                int j;
2702:
2703:                for (int i = 0; i < str.length(); i++) {
2704:                    j = (int) str.charAt(i);
2705:                    lexmap[j] |= code;
2706:                }
2707:            }
2708:
2709:            static {
2710:                mapStr("\r\n\f", (short) (NEWLINE | WHITE));
2711:                mapStr(" \t", WHITE);
2712:                mapStr("-.:_", NAMECHAR);
2713:                mapStr("0123456789", (short) (DIGIT | NAMECHAR));
2714:                mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE
2715:                        | LETTER | NAMECHAR));
2716:                mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE
2717:                        | LETTER | NAMECHAR));
2718:            }
2719:
2720:            private static short MAP(char c) {
2721:                return ((int) c < 128 ? lexmap[(int) c] : 0);
2722:            }
2723:
2724:            private static boolean isWhite(char c) {
2725:                short m = MAP(c);
2726:
2727:                return (m & WHITE) != 0;
2728:            }
2729:
2730:            private static boolean isDigit(char c) {
2731:                short m;
2732:
2733:                m = MAP(c);
2734:
2735:                return (m & DIGIT) != 0;
2736:            }
2737:
2738:            private static boolean isLetter(char c) {
2739:                short m;
2740:
2741:                m = MAP(c);
2742:
2743:                return (m & LETTER) != 0;
2744:            }
2745:
2746:            private static char toLower(char c) {
2747:                short m = MAP(c);
2748:
2749:                if ((m & UPPERCASE) != 0)
2750:                    c = (char) ((int) c + (int) 'a' - (int) 'A');
2751:
2752:                return c;
2753:            }
2754:
2755:            private static char toUpper(char c) {
2756:                short m = MAP(c);
2757:
2758:                if ((m & LOWERCASE) != 0)
2759:                    c = (char) ((int) c + (int) 'A' - (int) 'a');
2760:
2761:                return c;
2762:            }
2763:
2764:            public static char foldCase(char c, boolean tocaps, boolean xmlTags) {
2765:                short m;
2766:
2767:                if (!xmlTags) {
2768:                    m = MAP(c);
2769:
2770:                    if (tocaps) {
2771:                        if ((m & LOWERCASE) != 0)
2772:                            c = (char) ((int) c + (int) 'A' - (int) 'a');
2773:                    } else /* force to lower case */
2774:                    {
2775:                        if ((m & UPPERCASE) != 0)
2776:                            c = (char) ((int) c + (int) 'a' - (int) 'A');
2777:                    }
2778:                }
2779:
2780:                return c;
2781:            }
2782:
2783:            private static class W3CVersionInfo {
2784:                String name;
2785:                String voyagerName;
2786:                String profile;
2787:                short code;
2788:
2789:                public W3CVersionInfo(String name, String voyagerName,
2790:                        String profile, short code) {
2791:                    this .name = name;
2792:                    this .voyagerName = voyagerName;
2793:                    this .profile = profile;
2794:                    this .code = code;
2795:                }
2796:            }
2797:
2798:            /* the 3 URIs  for the XHTML 1.0 DTDs */
2799:            private static final String voyager_loose = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
2800:            private static final String voyager_strict = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
2801:            private static final String voyager_frameset = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd";
2802:
2803:            private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
2804:
2805:            private static Lexer.W3CVersionInfo[] W3CVersion = {
2806:                    new W3CVersionInfo("HTML 4.01", "XHTML 1.0 Strict",
2807:                            voyager_strict, Dict.VERS_HTML40_STRICT),
2808:                    new W3CVersionInfo("HTML 4.01 Transitional",
2809:                            "XHTML 1.0 Transitional", voyager_loose,
2810:                            Dict.VERS_HTML40_LOOSE),
2811:                    new W3CVersionInfo("HTML 4.01 Frameset",
2812:                            "XHTML 1.0 Frameset", voyager_frameset,
2813:                            Dict.VERS_FRAMES),
2814:                    new W3CVersionInfo("HTML 4.0", "XHTML 1.0 Strict",
2815:                            voyager_strict, Dict.VERS_HTML40_STRICT),
2816:                    new W3CVersionInfo("HTML 4.0 Transitional",
2817:                            "XHTML 1.0 Transitional", voyager_loose,
2818:                            Dict.VERS_HTML40_LOOSE),
2819:                    new W3CVersionInfo("HTML 4.0 Frameset",
2820:                            "XHTML 1.0 Frameset", voyager_frameset,
2821:                            Dict.VERS_FRAMES),
2822:                    new W3CVersionInfo("HTML 3.2", "XHTML 1.0 Transitional",
2823:                            voyager_loose, Dict.VERS_HTML32),
2824:                    new W3CVersionInfo("HTML 2.0", "XHTML 1.0 Strict",
2825:                            voyager_strict, Dict.VERS_HTML20) };
2826:
2827:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.