Source Code Cross Referenced for ParserImpl.java in » Groupware » hipergate » org » w3c » tidy » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Groupware » hipergate » org.w3c.tidy
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * @(#)ParserImpl.java   1.11 2000/08/16
0003:         *
0004:         */
0005:
0006:        package org.w3c.tidy;
0007:
0008:        /**
0009:         *
0010:         * HTML Parser implementation
0011:         *
0012:         * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
0013:         * See Tidy.java for the copyright notice.
0014:         * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
0015:         * HTML Tidy Release 4 Aug 2000</a>
0016:         *
0017:         * @author  Dave Raggett <dsr@w3.org>
0018:         * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0019:         * @version 1.0, 1999/05/22
0020:         * @version 1.0.1, 1999/05/29
0021:         * @version 1.1, 1999/06/18 Java Bean
0022:         * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
0023:         * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
0024:         * @version 1.4, 1999/09/04 DOM support
0025:         * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
0026:         * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
0027:         * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
0028:         * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
0029:         * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
0030:         * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
0031:         * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
0032:         */
0033:
0034:        public class ParserImpl {
0035:
0036:            //private static int SeenBodyEndTag;  /* AQ: moved into lexer structure */
0037:
0038:            private static void parseTag(Lexer lexer, Node node, short mode) {
0039:                // Local fix by GLP 2000-12-21.  Need to reset insertspace if this 
0040:                // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
0041:                // Remove this code once the fix is made in Tidy.
0042:
0043:                /******  (Original code follows)
0044:                 if ((node.tag.model & Dict.CM_EMPTY) != 0)
0045:                 {
0046:                 lexer.waswhite = false;
0047:                 return;
0048:                 }
0049:                 else if (!((node.tag.model & Dict.CM_INLINE) != 0))
0050:                 lexer.insertspace = false;
0051:                 *******/
0052:
0053:                if (!((node.tag.model & Dict.CM_INLINE) != 0))
0054:                    lexer.insertspace = false;
0055:
0056:                if ((node.tag.model & Dict.CM_EMPTY) != 0) {
0057:                    lexer.waswhite = false;
0058:                    return;
0059:                }
0060:
0061:                if (node.tag.parser == null || node.type == Node.StartEndTag)
0062:                    return;
0063:
0064:                node.tag.parser.parse(lexer, node, mode);
0065:            }
0066:
0067:            private static void moveToHead(Lexer lexer, Node element, Node node) {
0068:                Node head;
0069:                TagTable tt = lexer.configuration.tt;
0070:
0071:                if (node.type == Node.StartTag || node.type == Node.StartEndTag) {
0072:                    Report.warning(lexer, element, node,
0073:                            Report.TAG_NOT_ALLOWED_IN);
0074:
0075:                    while (element.tag != tt.tagHtml)
0076:                        element = element.parent;
0077:
0078:                    for (head = element.content; head != null; head = head.next) {
0079:                        if (head.tag == tt.tagHead) {
0080:                            Node.insertNodeAtEnd(head, node);
0081:                            break;
0082:                        }
0083:                    }
0084:
0085:                    if (node.tag.parser != null)
0086:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
0087:                } else {
0088:                    Report.warning(lexer, element, node,
0089:                            Report.DISCARDING_UNEXPECTED);
0090:                }
0091:            }
0092:
0093:            public static class ParseHTML implements  Parser {
0094:
0095:                public void parse(Lexer lexer, Node html, short mode) {
0096:                    Node node, head;
0097:                    Node frameset = null;
0098:                    Node noframes = null;
0099:
0100:                    lexer.configuration.XmlTags = false;
0101:                    lexer.seenBodyEndTag = 0;
0102:                    TagTable tt = lexer.configuration.tt;
0103:
0104:                    for (;;) {
0105:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
0106:
0107:                        if (node == null) {
0108:                            node = lexer.inferredTag("head");
0109:                            break;
0110:                        }
0111:
0112:                        if (node.tag == tt.tagHead)
0113:                            break;
0114:
0115:                        if (node.tag == html.tag && node.type == Node.EndTag) {
0116:                            Report.warning(lexer, html, node,
0117:                                    Report.DISCARDING_UNEXPECTED);
0118:                            continue;
0119:                        }
0120:
0121:                        /* deal with comments etc. */
0122:                        if (Node.insertMisc(html, node))
0123:                            continue;
0124:
0125:                        lexer.ungetToken();
0126:                        node = lexer.inferredTag("head");
0127:                        break;
0128:                    }
0129:
0130:                    head = node;
0131:                    Node.insertNodeAtEnd(html, head);
0132:                    getParseHead().parse(lexer, head, mode);
0133:
0134:                    for (;;) {
0135:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
0136:
0137:                        if (node == null) {
0138:                            if (frameset == null) /* create an empty body */
0139:                                node = lexer.inferredTag("body");
0140:
0141:                            return;
0142:                        }
0143:
0144:                        /* robustly handle html tags */
0145:                        if (node.tag == html.tag) {
0146:                            if (node.type != Node.StartTag && frameset == null)
0147:                                Report.warning(lexer, html, node,
0148:                                        Report.DISCARDING_UNEXPECTED);
0149:
0150:                            continue;
0151:                        }
0152:
0153:                        /* deal with comments etc. */
0154:                        if (Node.insertMisc(html, node))
0155:                            continue;
0156:
0157:                        /* if frameset document coerce <body> to <noframes> */
0158:                        if (node.tag == tt.tagBody) {
0159:                            if (node.type != Node.StartTag) {
0160:                                Report.warning(lexer, html, node,
0161:                                        Report.DISCARDING_UNEXPECTED);
0162:                                continue;
0163:                            }
0164:
0165:                            if (frameset != null) {
0166:                                lexer.ungetToken();
0167:
0168:                                if (noframes == null) {
0169:                                    noframes = lexer.inferredTag("noframes");
0170:                                    Node.insertNodeAtEnd(frameset, noframes);
0171:                                    Report.warning(lexer, html, noframes,
0172:                                            Report.INSERTING_TAG);
0173:                                }
0174:
0175:                                parseTag(lexer, noframes, mode);
0176:                                continue;
0177:                            }
0178:
0179:                            break; /* to parse body */
0180:                        }
0181:
0182:                        /* flag an error if we see more than one frameset */
0183:                        if (node.tag == tt.tagFrameset) {
0184:                            if (node.type != Node.StartTag) {
0185:                                Report.warning(lexer, html, node,
0186:                                        Report.DISCARDING_UNEXPECTED);
0187:                                continue;
0188:                            }
0189:
0190:                            if (frameset != null)
0191:                                Report.error(lexer, html, node,
0192:                                        Report.DUPLICATE_FRAMESET);
0193:                            else
0194:                                frameset = node;
0195:
0196:                            Node.insertNodeAtEnd(html, node);
0197:                            parseTag(lexer, node, mode);
0198:
0199:                            /*
0200:                              see if it includes a noframes element so
0201:                              that we can merge subsequent noframes elements
0202:                             */
0203:
0204:                            for (node = frameset.content; node != null; node = node.next) {
0205:                                if (node.tag == tt.tagNoframes)
0206:                                    noframes = node;
0207:                            }
0208:                            continue;
0209:                        }
0210:
0211:                        /* if not a frameset document coerce <noframes> to <body> */
0212:                        if (node.tag == tt.tagNoframes) {
0213:                            if (node.type != Node.StartTag) {
0214:                                Report.warning(lexer, html, node,
0215:                                        Report.DISCARDING_UNEXPECTED);
0216:                                continue;
0217:                            }
0218:
0219:                            if (frameset == null) {
0220:                                Report.warning(lexer, html, node,
0221:                                        Report.DISCARDING_UNEXPECTED);
0222:                                node = lexer.inferredTag("body");
0223:                                break;
0224:                            }
0225:
0226:                            if (noframes == null) {
0227:                                noframes = node;
0228:                                Node.insertNodeAtEnd(frameset, noframes);
0229:                            }
0230:
0231:                            parseTag(lexer, noframes, mode);
0232:                            continue;
0233:                        }
0234:
0235:                        if (node.type == Node.StartTag
0236:                                || node.type == Node.StartEndTag) {
0237:                            if (node.tag != null
0238:                                    && (node.tag.model & Dict.CM_HEAD) != 0) {
0239:                                moveToHead(lexer, html, node);
0240:                                continue;
0241:                            }
0242:                        }
0243:
0244:                        lexer.ungetToken();
0245:
0246:                        /* insert other content into noframes element */
0247:
0248:                        if (frameset != null) {
0249:                            if (noframes == null) {
0250:                                noframes = lexer.inferredTag("noframes");
0251:                                Node.insertNodeAtEnd(frameset, noframes);
0252:                            } else
0253:                                Report.warning(lexer, html, node,
0254:                                        Report.NOFRAMES_CONTENT);
0255:
0256:                            parseTag(lexer, noframes, mode);
0257:                            continue;
0258:                        }
0259:
0260:                        node = lexer.inferredTag("body");
0261:                        break;
0262:                    }
0263:
0264:                    /* node must be body */
0265:
0266:                    Node.insertNodeAtEnd(html, node);
0267:                    parseTag(lexer, node, mode);
0268:                }
0269:
0270:            };
0271:
0272:            public static class ParseHead implements  Parser {
0273:
0274:                public void parse(Lexer lexer, Node head, short mode) {
0275:                    Node node;
0276:                    int HasTitle = 0;
0277:                    int HasBase = 0;
0278:                    TagTable tt = lexer.configuration.tt;
0279:
0280:                    while (true) {
0281:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
0282:                        if (node == null)
0283:                            break;
0284:                        if (node.tag == head.tag && node.type == Node.EndTag) {
0285:                            head.closed = true;
0286:                            break;
0287:                        }
0288:
0289:                        if (node.type == Node.TextNode) {
0290:                            lexer.ungetToken();
0291:                            break;
0292:                        }
0293:
0294:                        /* deal with comments etc. */
0295:                        if (Node.insertMisc(head, node))
0296:                            continue;
0297:
0298:                        if (node.type == Node.DocTypeTag) {
0299:                            Node.insertDocType(lexer, head, node);
0300:                            continue;
0301:                        }
0302:
0303:                        /* discard unknown tags */
0304:                        if (node.tag == null) {
0305:                            Report.warning(lexer, head, node,
0306:                                    Report.DISCARDING_UNEXPECTED);
0307:                            continue;
0308:                        }
0309:
0310:                        if (!((node.tag.model & Dict.CM_HEAD) != 0)) {
0311:                            lexer.ungetToken();
0312:                            break;
0313:                        }
0314:
0315:                        if (node.type == Node.StartTag
0316:                                || node.type == Node.StartEndTag) {
0317:                            if (node.tag == tt.tagTitle) {
0318:                                ++HasTitle;
0319:
0320:                                if (HasTitle > 1)
0321:                                    Report.warning(lexer, head, node,
0322:                                            Report.TOO_MANY_ELEMENTS);
0323:                            } else if (node.tag == tt.tagBase) {
0324:                                ++HasBase;
0325:
0326:                                if (HasBase > 1)
0327:                                    Report.warning(lexer, head, node,
0328:                                            Report.TOO_MANY_ELEMENTS);
0329:                            } else if (node.tag == tt.tagNoscript)
0330:                                Report.warning(lexer, head, node,
0331:                                        Report.TAG_NOT_ALLOWED_IN);
0332:
0333:                            Node.insertNodeAtEnd(head, node);
0334:                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
0335:                            continue;
0336:                        }
0337:
0338:                        /* discard unexpected text nodes and end tags */
0339:                        Report.warning(lexer, head, node,
0340:                                Report.DISCARDING_UNEXPECTED);
0341:                    }
0342:
0343:                    if (HasTitle == 0) {
0344:                        Report.warning(lexer, head, null,
0345:                                Report.MISSING_TITLE_ELEMENT);
0346:                        Node.insertNodeAtEnd(head, lexer.inferredTag("title"));
0347:                    }
0348:                }
0349:
0350:            };
0351:
0352:            public static class ParseTitle implements  Parser {
0353:
0354:                public void parse(Lexer lexer, Node title, short mode) {
0355:                    Node node;
0356:
0357:                    while (true) {
0358:                        node = lexer.getToken(Lexer.MixedContent);
0359:                        if (node == null)
0360:                            break;
0361:                        if (node.tag == title.tag && node.type == Node.EndTag) {
0362:                            title.closed = true;
0363:                            Node.trimSpaces(lexer, title);
0364:                            return;
0365:                        }
0366:
0367:                        if (node.type == Node.TextNode) {
0368:                            /* only called for 1st child */
0369:                            if (title.content == null)
0370:                                Node.trimInitialSpace(lexer, title, node);
0371:
0372:                            if (node.start >= node.end) {
0373:                                continue;
0374:                            }
0375:
0376:                            Node.insertNodeAtEnd(title, node);
0377:                            continue;
0378:                        }
0379:
0380:                        /* deal with comments etc. */
0381:                        if (Node.insertMisc(title, node))
0382:                            continue;
0383:
0384:                        /* discard unknown tags */
0385:                        if (node.tag == null) {
0386:                            Report.warning(lexer, title, node,
0387:                                    Report.DISCARDING_UNEXPECTED);
0388:                            continue;
0389:                        }
0390:
0391:                        /* pushback unexpected tokens */
0392:                        Report.warning(lexer, title, node,
0393:                                Report.MISSING_ENDTAG_BEFORE);
0394:                        lexer.ungetToken();
0395:                        Node.trimSpaces(lexer, title);
0396:                        return;
0397:                    }
0398:
0399:                    Report.warning(lexer, title, node,
0400:                            Report.MISSING_ENDTAG_FOR);
0401:                }
0402:
0403:            };
0404:
0405:            public static class ParseScript implements  Parser {
0406:
0407:                public void parse(Lexer lexer, Node script, short mode) {
0408:                    /*
0409:                      This isn't quite right for CDATA content as it recognises
0410:                      tags within the content and parses them accordingly.
0411:                      This will unfortunately screw up scripts which include
0412:                      < + letter,  < + !, < + ?  or  < + / + letter
0413:                     */
0414:
0415:                    Node node;
0416:
0417:                    node = lexer.getCDATA(script);
0418:
0419:                    if (node != null)
0420:                        Node.insertNodeAtEnd(script, node);
0421:                }
0422:
0423:            };
0424:
0425:            public static class ParseBody implements  Parser {
0426:
0427:                public void parse(Lexer lexer, Node body, short mode) {
0428:                    Node node;
0429:                    boolean checkstack, iswhitenode;
0430:
0431:                    mode = Lexer.IgnoreWhitespace;
0432:                    checkstack = true;
0433:                    TagTable tt = lexer.configuration.tt;
0434:
0435:                    while (true) {
0436:                        node = lexer.getToken(mode);
0437:                        if (node == null)
0438:                            break;
0439:                        if (node.tag == body.tag && node.type == Node.EndTag) {
0440:                            body.closed = true;
0441:                            Node.trimSpaces(lexer, body);
0442:                            lexer.seenBodyEndTag = 1;
0443:                            mode = Lexer.IgnoreWhitespace;
0444:
0445:                            if (body.parent.tag == tt.tagNoframes)
0446:                                break;
0447:
0448:                            continue;
0449:                        }
0450:
0451:                        if (node.tag == tt.tagNoframes) {
0452:                            if (node.type == Node.StartTag) {
0453:                                Node.insertNodeAtEnd(body, node);
0454:                                getParseBlock().parse(lexer, node, mode);
0455:                                continue;
0456:                            }
0457:
0458:                            if (node.type == Node.EndTag
0459:                                    && body.parent.tag == tt.tagNoframes) {
0460:                                Node.trimSpaces(lexer, body);
0461:                                lexer.ungetToken();
0462:                                break;
0463:                            }
0464:                        }
0465:
0466:                        if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)
0467:                                && body.parent.tag == tt.tagNoframes) {
0468:                            Node.trimSpaces(lexer, body);
0469:                            lexer.ungetToken();
0470:                            break;
0471:                        }
0472:
0473:                        if (node.tag == tt.tagHtml) {
0474:                            if (node.type == Node.StartTag
0475:                                    || node.type == Node.StartEndTag)
0476:                                Report.warning(lexer, body, node,
0477:                                        Report.DISCARDING_UNEXPECTED);
0478:
0479:                            continue;
0480:                        }
0481:
0482:                        iswhitenode = false;
0483:
0484:                        if (node.type == Node.TextNode
0485:                                && node.end <= node.start + 1
0486:                                && node.textarray[node.start] == (byte) ' ')
0487:                            iswhitenode = true;
0488:
0489:                        /* deal with comments etc. */
0490:                        if (Node.insertMisc(body, node))
0491:                            continue;
0492:
0493:                        if (lexer.seenBodyEndTag == 1 && !iswhitenode) {
0494:                            ++lexer.seenBodyEndTag;
0495:                            Report.warning(lexer, body, node,
0496:                                    Report.CONTENT_AFTER_BODY);
0497:                        }
0498:
0499:                        /* mixed content model permits text */
0500:                        if (node.type == Node.TextNode) {
0501:                            if (iswhitenode && mode == Lexer.IgnoreWhitespace) {
0502:                                continue;
0503:                            }
0504:
0505:                            if (lexer.configuration.EncloseBodyText
0506:                                    && !iswhitenode) {
0507:                                Node para;
0508:
0509:                                lexer.ungetToken();
0510:                                para = lexer.inferredTag("p");
0511:                                Node.insertNodeAtEnd(body, para);
0512:                                parseTag(lexer, para, mode);
0513:                                mode = Lexer.MixedContent;
0514:                                continue;
0515:                            } else
0516:                                /* strict doesn't allow text here */
0517:                                lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
0518:
0519:                            if (checkstack) {
0520:                                checkstack = false;
0521:
0522:                                if (lexer.inlineDup(node) > 0)
0523:                                    continue;
0524:                            }
0525:
0526:                            Node.insertNodeAtEnd(body, node);
0527:                            mode = Lexer.MixedContent;
0528:                            continue;
0529:                        }
0530:
0531:                        if (node.type == Node.DocTypeTag) {
0532:                            Node.insertDocType(lexer, body, node);
0533:                            continue;
0534:                        }
0535:                        /* discard unknown  and PARAM tags */
0536:                        if (node.tag == null || node.tag == tt.tagParam) {
0537:                            Report.warning(lexer, body, node,
0538:                                    Report.DISCARDING_UNEXPECTED);
0539:                            continue;
0540:                        }
0541:
0542:                        /*
0543:                          Netscape allows LI and DD directly in BODY
0544:                          We infer UL or DL respectively and use this
0545:                          boolean to exclude block-level elements so as
0546:                          to match Netscape's observed behaviour.
0547:                         */
0548:                        lexer.excludeBlocks = false;
0549:
0550:                        if (!((node.tag.model & Dict.CM_BLOCK) != 0)
0551:                                && !((node.tag.model & Dict.CM_INLINE) != 0)) {
0552:                            /* avoid this error message being issued twice */
0553:                            if (!((node.tag.model & Dict.CM_HEAD) != 0))
0554:                                Report.warning(lexer, body, node,
0555:                                        Report.TAG_NOT_ALLOWED_IN);
0556:
0557:                            if ((node.tag.model & Dict.CM_HTML) != 0) {
0558:                                /* copy body attributes if current body was inferred */
0559:                                if (node.tag == tt.tagBody && body.implicit
0560:                                        && body.attributes == null) {
0561:                                    body.attributes = node.attributes;
0562:                                    node.attributes = null;
0563:                                }
0564:
0565:                                continue;
0566:                            }
0567:
0568:                            if ((node.tag.model & Dict.CM_HEAD) != 0) {
0569:                                moveToHead(lexer, body, node);
0570:                                continue;
0571:                            }
0572:
0573:                            if ((node.tag.model & Dict.CM_LIST) != 0) {
0574:                                lexer.ungetToken();
0575:                                node = lexer.inferredTag("ul");
0576:                                Node.addClass(node, "noindent");
0577:                                lexer.excludeBlocks = true;
0578:                            } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) {
0579:                                lexer.ungetToken();
0580:                                node = lexer.inferredTag("dl");
0581:                                lexer.excludeBlocks = true;
0582:                            } else if ((node.tag.model & (Dict.CM_TABLE
0583:                                    | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0) {
0584:                                lexer.ungetToken();
0585:                                node = lexer.inferredTag("table");
0586:                                lexer.excludeBlocks = true;
0587:                            } else {
0588:                                /* AQ: The following line is from the official C
0589:                                   version of tidy.  It doesn't make sense to me
0590:                                   because the '!' operator has higher precedence
0591:                                   than the '&' operator.  It seems to me that the
0592:                                   expression always evaluates to 0.
0593:
0594:                                   if (!node->tag->model & (CM_ROW | CM_FIELD))
0595:
0596:                                   AQ: 13Jan2000 fixed in C tidy
0597:                                 */
0598:                                if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0)) {
0599:                                    lexer.ungetToken();
0600:                                    return;
0601:                                }
0602:
0603:                                /* ignore </td> </th> <option> etc. */
0604:                                continue;
0605:                            }
0606:                        }
0607:
0608:                        if (node.type == Node.EndTag) {
0609:                            if (node.tag == tt.tagBr)
0610:                                node.type = Node.StartTag;
0611:                            else if (node.tag == tt.tagP) {
0612:                                Node.coerceNode(lexer, node, tt.tagBr);
0613:                                Node.insertNodeAtEnd(body, node);
0614:                                node = lexer.inferredTag("br");
0615:                            } else if ((node.tag.model & Dict.CM_INLINE) != 0)
0616:                                lexer.popInline(node);
0617:                        }
0618:
0619:                        if (node.type == Node.StartTag
0620:                                || node.type == Node.StartEndTag) {
0621:                            if (((node.tag.model & Dict.CM_INLINE) != 0)
0622:                                    && !((node.tag.model & Dict.CM_MIXED) != 0)) {
0623:                                /* HTML4 strict doesn't allow inline content here */
0624:                                /* but HTML2 does allow img elements as children of body */
0625:                                if (node.tag == tt.tagImg)
0626:                                    lexer.versions &= ~Dict.VERS_HTML40_STRICT;
0627:                                else
0628:                                    lexer.versions &= ~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20);
0629:
0630:                                if (checkstack && !node.implicit) {
0631:                                    checkstack = false;
0632:
0633:                                    if (lexer.inlineDup(node) > 0)
0634:                                        continue;
0635:                                }
0636:
0637:                                mode = Lexer.MixedContent;
0638:                            } else {
0639:                                checkstack = true;
0640:                                mode = Lexer.IgnoreWhitespace;
0641:                            }
0642:
0643:                            if (node.implicit)
0644:                                Report.warning(lexer, body, node,
0645:                                        Report.INSERTING_TAG);
0646:
0647:                            Node.insertNodeAtEnd(body, node);
0648:                            parseTag(lexer, node, mode);
0649:                            continue;
0650:                        }
0651:
0652:                        /* discard unexpected tags */
0653:                        Report.warning(lexer, body, node,
0654:                                Report.DISCARDING_UNEXPECTED);
0655:                    }
0656:                }
0657:
0658:            };
0659:
0660:            public static class ParseFrameSet implements  Parser {
0661:
0662:                public void parse(Lexer lexer, Node frameset, short mode) {
0663:                    Node node;
0664:                    TagTable tt = lexer.configuration.tt;
0665:
0666:                    lexer.badAccess |= Report.USING_FRAMES;
0667:
0668:                    while (true) {
0669:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
0670:                        if (node == null)
0671:                            break;
0672:                        if (node.tag == frameset.tag
0673:                                && node.type == Node.EndTag) {
0674:                            frameset.closed = true;
0675:                            Node.trimSpaces(lexer, frameset);
0676:                            return;
0677:                        }
0678:
0679:                        /* deal with comments etc. */
0680:                        if (Node.insertMisc(frameset, node))
0681:                            continue;
0682:
0683:                        if (node.tag == null) {
0684:                            Report.warning(lexer, frameset, node,
0685:                                    Report.DISCARDING_UNEXPECTED);
0686:                            continue;
0687:                        }
0688:
0689:                        if (node.type == Node.StartTag
0690:                                || node.type == Node.StartEndTag) {
0691:                            if (node.tag != null
0692:                                    && (node.tag.model & Dict.CM_HEAD) != 0) {
0693:                                moveToHead(lexer, frameset, node);
0694:                                continue;
0695:                            }
0696:                        }
0697:
0698:                        if (node.tag == tt.tagBody) {
0699:                            lexer.ungetToken();
0700:                            node = lexer.inferredTag("noframes");
0701:                            Report.warning(lexer, frameset, node,
0702:                                    Report.INSERTING_TAG);
0703:                        }
0704:
0705:                        if (node.type == Node.StartTag
0706:                                && (node.tag.model & Dict.CM_FRAMES) != 0) {
0707:                            Node.insertNodeAtEnd(frameset, node);
0708:                            lexer.excludeBlocks = false;
0709:                            parseTag(lexer, node, Lexer.MixedContent);
0710:                            continue;
0711:                        } else if (node.type == Node.StartEndTag
0712:                                && (node.tag.model & Dict.CM_FRAMES) != 0) {
0713:                            Node.insertNodeAtEnd(frameset, node);
0714:                            continue;
0715:                        }
0716:
0717:                        /* discard unexpected tags */
0718:                        Report.warning(lexer, frameset, node,
0719:                                Report.DISCARDING_UNEXPECTED);
0720:                    }
0721:
0722:                    Report.warning(lexer, frameset, node,
0723:                            Report.MISSING_ENDTAG_FOR);
0724:                }
0725:
0726:            };
0727:
0728:            public static class ParseInline implements  Parser {
0729:
0730:                public void parse(Lexer lexer, Node element, short mode) {
0731:                    Node node, parent;
0732:                    TagTable tt = lexer.configuration.tt;
0733:
0734:                    if ((element.tag.model & Dict.CM_EMPTY) != 0)
0735:                        return;
0736:
0737:                    if (element.tag == tt.tagA) {
0738:                        if (element.attributes == null) {
0739:                            Report.warning(lexer, element.parent, element,
0740:                                    Report.DISCARDING_UNEXPECTED);
0741:                            Node.discardElement(element);
0742:                            return;
0743:                        }
0744:                    }
0745:
0746:                    /*
0747:                     ParseInline is used for some block level elements like H1 to H6
0748:                     For such elements we need to insert inline emphasis tags currently
0749:                     on the inline stack. For Inline elements, we normally push them
0750:                     onto the inline stack provided they aren't implicit or OBJECT/APPLET.
0751:                     This test is carried out in PushInline and PopInline, see istack.c
0752:                     We don't push A or SPAN to replicate current browser behavior
0753:                     */
0754:                    if (((element.tag.model & Dict.CM_BLOCK) != 0)
0755:                            || (element.tag == tt.tagDt))
0756:                        lexer.inlineDup(null);
0757:                    else if ((element.tag.model & Dict.CM_INLINE) != 0
0758:                            && element.tag != tt.tagA
0759:                            && element.tag != tt.tagSpan)
0760:                        lexer.pushInline(element);
0761:
0762:                    if (element.tag == tt.tagNobr)
0763:                        lexer.badLayout |= Report.USING_NOBR;
0764:                    else if (element.tag == tt.tagFont)
0765:                        lexer.badLayout |= Report.USING_FONT;
0766:
0767:                    /* Inline elements may or may not be within a preformatted element */
0768:                    if (mode != Lexer.Preformatted)
0769:                        mode = Lexer.MixedContent;
0770:
0771:                    while (true) {
0772:                        node = lexer.getToken(mode);
0773:                        if (node == null)
0774:                            break;
0775:                        /* end tag for current element */
0776:                        if (node.tag == element.tag && node.type == Node.EndTag) {
0777:                            if ((element.tag.model & Dict.CM_INLINE) != 0
0778:                                    && element.tag != tt.tagA)
0779:                                lexer.popInline(node);
0780:
0781:                            if (!((mode & Lexer.Preformatted) != 0))
0782:                                Node.trimSpaces(lexer, element);
0783:                            /*
0784:                             if a font element wraps an anchor and nothing else
0785:                             then move the font element inside the anchor since
0786:                             otherwise it won't alter the anchor text color
0787:                             */
0788:                            if (element.tag == tt.tagFont
0789:                                    && element.content != null
0790:                                    && element.content == element.last) {
0791:                                Node child = element.content;
0792:
0793:                                if (child.tag == tt.tagA) {
0794:                                    child.parent = element.parent;
0795:                                    child.next = element.next;
0796:                                    child.prev = element.prev;
0797:
0798:                                    if (child.prev != null)
0799:                                        child.prev.next = child;
0800:                                    else
0801:                                        child.parent.content = child;
0802:
0803:                                    if (child.next != null)
0804:                                        child.next.prev = child;
0805:                                    else
0806:                                        child.parent.last = child;
0807:
0808:                                    element.next = null;
0809:                                    element.prev = null;
0810:                                    element.parent = child;
0811:                                    element.content = child.content;
0812:                                    element.last = child.last;
0813:                                    child.content = element;
0814:                                    child.last = element;
0815:                                    for (child = element.content; child != null; child = child.next)
0816:                                        child.parent = element;
0817:                                }
0818:                            }
0819:                            element.closed = true;
0820:                            Node.trimSpaces(lexer, element);
0821:                            Node.trimEmptyElement(lexer, element);
0822:                            return;
0823:                        }
0824:
0825:                        /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
0826:                        /* otherwise emphasis nesting is probably unintentional */
0827:                        /* big and small have cumulative effect to leave them alone */
0828:                        if (node.type == Node.StartTag
0829:                                && node.tag == element.tag
0830:                                && lexer.isPushed(node) && !node.implicit
0831:                                && !element.implicit && node.tag != null
0832:                                && ((node.tag.model & Dict.CM_INLINE) != 0)
0833:                                && node.tag != tt.tagA
0834:                                && node.tag != tt.tagFont
0835:                                && node.tag != tt.tagBig
0836:                                && node.tag != tt.tagSmall) {
0837:                            if (element.content != null
0838:                                    && node.attributes == null) {
0839:                                Report.warning(lexer, element, node,
0840:                                        Report.COERCE_TO_ENDTAG);
0841:                                node.type = Node.EndTag;
0842:                                lexer.ungetToken();
0843:                                continue;
0844:                            }
0845:
0846:                            Report.warning(lexer, element, node,
0847:                                    Report.NESTED_EMPHASIS);
0848:                        }
0849:
0850:                        if (node.type == Node.TextNode) {
0851:                            /* only called for 1st child */
0852:                            if (element.content == null
0853:                                    && !((mode & Lexer.Preformatted) != 0))
0854:                                Node.trimSpaces(lexer, element);
0855:
0856:                            if (node.start >= node.end) {
0857:                                continue;
0858:                            }
0859:
0860:                            Node.insertNodeAtEnd(element, node);
0861:                            continue;
0862:                        }
0863:
0864:                        /* mixed content model so allow text */
0865:                        if (Node.insertMisc(element, node))
0866:                            continue;
0867:
0868:                        /* deal with HTML tags */
0869:                        if (node.tag == tt.tagHtml) {
0870:                            if (node.type == Node.StartTag
0871:                                    || node.type == Node.StartEndTag) {
0872:                                Report.warning(lexer, element, node,
0873:                                        Report.DISCARDING_UNEXPECTED);
0874:                                continue;
0875:                            }
0876:
0877:                            /* otherwise infer end of inline element */
0878:                            lexer.ungetToken();
0879:                            if (!((mode & Lexer.Preformatted) != 0))
0880:                                Node.trimSpaces(lexer, element);
0881:                            Node.trimEmptyElement(lexer, element);
0882:                            return;
0883:                        }
0884:
0885:                        /* within <dt> or <pre> map <p> to <br> */
0886:                        if (node.tag == tt.tagP
0887:                                && node.type == Node.StartTag
0888:                                && ((mode & Lexer.Preformatted) != 0
0889:                                        || element.tag == tt.tagDt || element
0890:                                        .isDescendantOf(tt.tagDt))) {
0891:                            node.tag = tt.tagBr;
0892:                            node.element = "br";
0893:                            Node.trimSpaces(lexer, element);
0894:                            Node.insertNodeAtEnd(element, node);
0895:                            continue;
0896:                        }
0897:
0898:                        /* ignore unknown and PARAM tags */
0899:                        if (node.tag == null || node.tag == tt.tagParam) {
0900:                            Report.warning(lexer, element, node,
0901:                                    Report.DISCARDING_UNEXPECTED);
0902:                            continue;
0903:                        }
0904:
0905:                        if (node.tag == tt.tagBr && node.type == Node.EndTag)
0906:                            node.type = Node.StartTag;
0907:
0908:                        if (node.type == Node.EndTag) {
0909:                            /* coerce </br> to <br> */
0910:                            if (node.tag == tt.tagBr)
0911:                                node.type = Node.StartTag;
0912:                            else if (node.tag == tt.tagP) {
0913:                                /* coerce unmatched </p> to <br><br> */
0914:                                if (!element.isDescendantOf(tt.tagP)) {
0915:                                    Node.coerceNode(lexer, node, tt.tagBr);
0916:                                    Node.trimSpaces(lexer, element);
0917:                                    Node.insertNodeAtEnd(element, node);
0918:                                    node = lexer.inferredTag("br");
0919:                                    continue;
0920:                                }
0921:                            } else if ((node.tag.model & Dict.CM_INLINE) != 0
0922:                                    && node.tag != tt.tagA
0923:                                    && !((node.tag.model & Dict.CM_OBJECT) != 0)
0924:                                    && (element.tag.model & Dict.CM_INLINE) != 0) {
0925:                                /* allow any inline end tag to end current element */
0926:                                lexer.popInline(element);
0927:
0928:                                if (element.tag != tt.tagA) {
0929:                                    if (node.tag == tt.tagA
0930:                                            && node.tag != element.tag) {
0931:                                        Report.warning(lexer, element, node,
0932:                                                Report.MISSING_ENDTAG_BEFORE);
0933:                                        lexer.ungetToken();
0934:                                    } else {
0935:                                        Report.warning(lexer, element, node,
0936:                                                Report.NON_MATCHING_ENDTAG);
0937:                                    }
0938:
0939:                                    if (!((mode & Lexer.Preformatted) != 0))
0940:                                        Node.trimSpaces(lexer, element);
0941:                                    Node.trimEmptyElement(lexer, element);
0942:                                    return;
0943:                                }
0944:
0945:                                /* if parent is <a> then discard unexpected inline end tag */
0946:                                Report.warning(lexer, element, node,
0947:                                        Report.DISCARDING_UNEXPECTED);
0948:                                continue;
0949:                            } /* special case </tr> etc. for stuff moved in front of table */
0950:                            else if (lexer.exiled && node.tag.model != 0
0951:                                    && (node.tag.model & Dict.CM_TABLE) != 0) {
0952:                                lexer.ungetToken();
0953:                                Node.trimSpaces(lexer, element);
0954:                                Node.trimEmptyElement(lexer, element);
0955:                                return;
0956:                            }
0957:                        }
0958:
0959:                        /* allow any header tag to end current header */
0960:                        if ((node.tag.model & Dict.CM_HEADING) != 0
0961:                                && (element.tag.model & Dict.CM_HEADING) != 0) {
0962:                            if (node.tag == element.tag) {
0963:                                Report.warning(lexer, element, node,
0964:                                        Report.NON_MATCHING_ENDTAG);
0965:                            } else {
0966:                                Report.warning(lexer, element, node,
0967:                                        Report.MISSING_ENDTAG_BEFORE);
0968:                                lexer.ungetToken();
0969:                            }
0970:                            if (!((mode & Lexer.Preformatted) != 0))
0971:                                Node.trimSpaces(lexer, element);
0972:                            Node.trimEmptyElement(lexer, element);
0973:                            return;
0974:                        }
0975:
0976:                        /*
0977:                           an <A> tag to ends any open <A> element
0978:                           but <A href=...> is mapped to </A><A href=...>
0979:                         */
0980:                        if (node.tag == tt.tagA && !node.implicit
0981:                                && lexer.isPushed(node)) {
0982:                            /* coerce <a> to </a> unless it has some attributes */
0983:                            if (node.attributes == null) {
0984:                                node.type = Node.EndTag;
0985:                                Report.warning(lexer, element, node,
0986:                                        Report.COERCE_TO_ENDTAG);
0987:                                lexer.popInline(node);
0988:                                lexer.ungetToken();
0989:                                continue;
0990:                            }
0991:
0992:                            lexer.ungetToken();
0993:                            Report.warning(lexer, element, node,
0994:                                    Report.MISSING_ENDTAG_BEFORE);
0995:                            lexer.popInline(element);
0996:                            if (!((mode & Lexer.Preformatted) != 0))
0997:                                Node.trimSpaces(lexer, element);
0998:                            Node.trimEmptyElement(lexer, element);
0999:                            return;
1000:                        }
1001:
1002:                        if ((element.tag.model & Dict.CM_HEADING) != 0) {
1003:                            if (node.tag == tt.tagCenter
1004:                                    || node.tag == tt.tagDiv) {
1005:                                if (node.type != Node.StartTag
1006:                                        && node.type != Node.StartEndTag) {
1007:                                    Report.warning(lexer, element, node,
1008:                                            Report.DISCARDING_UNEXPECTED);
1009:                                    continue;
1010:                                }
1011:
1012:                                Report.warning(lexer, element, node,
1013:                                        Report.TAG_NOT_ALLOWED_IN);
1014:
1015:                                /* insert center as parent if heading is empty */
1016:                                if (element.content == null) {
1017:                                    Node.insertNodeAsParent(element, node);
1018:                                    continue;
1019:                                }
1020:
1021:                                /* split heading and make center parent of 2nd part */
1022:                                Node.insertNodeAfterElement(element, node);
1023:
1024:                                if (!((mode & Lexer.Preformatted) != 0))
1025:                                    Node.trimSpaces(lexer, element);
1026:
1027:                                element = lexer.cloneNode(element);
1028:                                element.start = lexer.lexsize;
1029:                                element.end = lexer.lexsize;
1030:                                Node.insertNodeAtEnd(node, element);
1031:                                continue;
1032:                            }
1033:
1034:                            if (node.tag == tt.tagHr) {
1035:                                if (node.type != Node.StartTag
1036:                                        && node.type != Node.StartEndTag) {
1037:                                    Report.warning(lexer, element, node,
1038:                                            Report.DISCARDING_UNEXPECTED);
1039:                                    continue;
1040:                                }
1041:
1042:                                Report.warning(lexer, element, node,
1043:                                        Report.TAG_NOT_ALLOWED_IN);
1044:
1045:                                /* insert hr before heading if heading is empty */
1046:                                if (element.content == null) {
1047:                                    Node.insertNodeBeforeElement(element, node);
1048:                                    continue;
1049:                                }
1050:
1051:                                /* split heading and insert hr before 2nd part */
1052:                                Node.insertNodeAfterElement(element, node);
1053:
1054:                                if (!((mode & Lexer.Preformatted) != 0))
1055:                                    Node.trimSpaces(lexer, element);
1056:
1057:                                element = lexer.cloneNode(element);
1058:                                element.start = lexer.lexsize;
1059:                                element.end = lexer.lexsize;
1060:                                Node.insertNodeAfterElement(node, element);
1061:                                continue;
1062:                            }
1063:                        }
1064:
1065:                        if (element.tag == tt.tagDt) {
1066:                            if (node.tag == tt.tagHr) {
1067:                                Node dd;
1068:
1069:                                if (node.type != Node.StartTag
1070:                                        && node.type != Node.StartEndTag) {
1071:                                    Report.warning(lexer, element, node,
1072:                                            Report.DISCARDING_UNEXPECTED);
1073:                                    continue;
1074:                                }
1075:
1076:                                Report.warning(lexer, element, node,
1077:                                        Report.TAG_NOT_ALLOWED_IN);
1078:                                dd = lexer.inferredTag("dd");
1079:
1080:                                /* insert hr within dd before dt if dt is empty */
1081:                                if (element.content == null) {
1082:                                    Node.insertNodeBeforeElement(element, dd);
1083:                                    Node.insertNodeAtEnd(dd, node);
1084:                                    continue;
1085:                                }
1086:
1087:                                /* split dt and insert hr within dd before 2nd part */
1088:                                Node.insertNodeAfterElement(element, dd);
1089:                                Node.insertNodeAtEnd(dd, node);
1090:
1091:                                if (!((mode & Lexer.Preformatted) != 0))
1092:                                    Node.trimSpaces(lexer, element);
1093:
1094:                                element = lexer.cloneNode(element);
1095:                                element.start = lexer.lexsize;
1096:                                element.end = lexer.lexsize;
1097:                                Node.insertNodeAfterElement(dd, element);
1098:                                continue;
1099:                            }
1100:                        }
1101:
1102:                        /* 
1103:                          if this is the end tag for an ancestor element
1104:                          then infer end tag for this element
1105:                         */
1106:                        if (node.type == Node.EndTag) {
1107:                            for (parent = element.parent; parent != null; parent = parent.parent) {
1108:                                if (node.tag == parent.tag) {
1109:                                    if (!((element.tag.model & Dict.CM_OPT) != 0)
1110:                                            && !element.implicit)
1111:                                        Report.warning(lexer, element, node,
1112:                                                Report.MISSING_ENDTAG_BEFORE);
1113:
1114:                                    if (element.tag == tt.tagA)
1115:                                        lexer.popInline(element);
1116:
1117:                                    lexer.ungetToken();
1118:
1119:                                    if (!((mode & Lexer.Preformatted) != 0))
1120:                                        Node.trimSpaces(lexer, element);
1121:
1122:                                    Node.trimEmptyElement(lexer, element);
1123:                                    return;
1124:                                }
1125:                            }
1126:                        }
1127:
1128:                        /* block level tags end this element */
1129:                        if (!((node.tag.model & Dict.CM_INLINE) != 0)) {
1130:                            if (node.type != Node.StartTag) {
1131:                                Report.warning(lexer, element, node,
1132:                                        Report.DISCARDING_UNEXPECTED);
1133:                                continue;
1134:                            }
1135:
1136:                            if (!((element.tag.model & Dict.CM_OPT) != 0))
1137:                                Report.warning(lexer, element, node,
1138:                                        Report.MISSING_ENDTAG_BEFORE);
1139:
1140:                            if ((node.tag.model & Dict.CM_HEAD) != 0
1141:                                    && !((node.tag.model & Dict.CM_BLOCK) != 0)) {
1142:                                moveToHead(lexer, element, node);
1143:                                continue;
1144:                            }
1145:
1146:                            /*
1147:                               prevent anchors from propagating into block tags
1148:                               except for headings h1 to h6
1149:                             */
1150:                            if (element.tag == tt.tagA) {
1151:                                if (node.tag != null
1152:                                        && !((node.tag.model & Dict.CM_HEADING) != 0))
1153:                                    lexer.popInline(element);
1154:                                else if (!(element.content != null)) {
1155:                                    Node.discardElement(element);
1156:                                    lexer.ungetToken();
1157:                                    return;
1158:                                }
1159:                            }
1160:
1161:                            lexer.ungetToken();
1162:
1163:                            if (!((mode & Lexer.Preformatted) != 0))
1164:                                Node.trimSpaces(lexer, element);
1165:
1166:                            Node.trimEmptyElement(lexer, element);
1167:                            return;
1168:                        }
1169:
1170:                        /* parse inline element */
1171:                        if (node.type == Node.StartTag
1172:                                || node.type == Node.StartEndTag) {
1173:                            if (node.implicit)
1174:                                Report.warning(lexer, element, node,
1175:                                        Report.INSERTING_TAG);
1176:
1177:                            /* trim white space before <br> */
1178:                            if (node.tag == tt.tagBr)
1179:                                Node.trimSpaces(lexer, element);
1180:
1181:                            Node.insertNodeAtEnd(element, node);
1182:                            parseTag(lexer, node, mode);
1183:                            continue;
1184:                        }
1185:
1186:                        /* discard unexpected tags */
1187:                        Report.warning(lexer, element, node,
1188:                                Report.DISCARDING_UNEXPECTED);
1189:                    }
1190:
1191:                    if (!((element.tag.model & Dict.CM_OPT) != 0))
1192:                        Report.warning(lexer, element, node,
1193:                                Report.MISSING_ENDTAG_FOR);
1194:
1195:                    Node.trimEmptyElement(lexer, element);
1196:                }
1197:            };
1198:
1199:            public static class ParseList implements  Parser {
1200:
1201:                public void parse(Lexer lexer, Node list, short mode) {
1202:                    Node node;
1203:                    Node parent;
1204:                    TagTable tt = lexer.configuration.tt;
1205:
1206:                    if ((list.tag.model & Dict.CM_EMPTY) != 0)
1207:                        return;
1208:
1209:                    lexer.insert = -1; /* defer implicit inline start tags */
1210:
1211:                    while (true) {
1212:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
1213:                        if (node == null)
1214:                            break;
1215:
1216:                        if (node.tag == list.tag && node.type == Node.EndTag) {
1217:                            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1218:                                Node.coerceNode(lexer, list, tt.tagUl);
1219:
1220:                            list.closed = true;
1221:                            Node.trimEmptyElement(lexer, list);
1222:                            return;
1223:                        }
1224:
1225:                        /* deal with comments etc. */
1226:                        if (Node.insertMisc(list, node))
1227:                            continue;
1228:
1229:                        if (node.type != Node.TextNode && node.tag == null) {
1230:                            Report.warning(lexer, list, node,
1231:                                    Report.DISCARDING_UNEXPECTED);
1232:                            continue;
1233:                        }
1234:
1235:                        /* 
1236:                          if this is the end tag for an ancestor element
1237:                          then infer end tag for this element
1238:                         */
1239:                        if (node.type == Node.EndTag) {
1240:                            if (node.tag == tt.tagForm) {
1241:                                lexer.badForm = 1;
1242:                                Report.warning(lexer, list, node,
1243:                                        Report.DISCARDING_UNEXPECTED);
1244:                                continue;
1245:                            }
1246:
1247:                            if (node.tag != null
1248:                                    && (node.tag.model & Dict.CM_INLINE) != 0) {
1249:                                Report.warning(lexer, list, node,
1250:                                        Report.DISCARDING_UNEXPECTED);
1251:                                lexer.popInline(node);
1252:                                continue;
1253:                            }
1254:
1255:                            for (parent = list.parent; parent != null; parent = parent.parent) {
1256:                                if (node.tag == parent.tag) {
1257:                                    Report.warning(lexer, list, node,
1258:                                            Report.MISSING_ENDTAG_BEFORE);
1259:                                    lexer.ungetToken();
1260:
1261:                                    if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1262:                                        Node.coerceNode(lexer, list, tt.tagUl);
1263:
1264:                                    Node.trimEmptyElement(lexer, list);
1265:                                    return;
1266:                                }
1267:                            }
1268:
1269:                            Report.warning(lexer, list, node,
1270:                                    Report.DISCARDING_UNEXPECTED);
1271:                            continue;
1272:                        }
1273:
1274:                        if (node.tag != tt.tagLi) {
1275:                            lexer.ungetToken();
1276:
1277:                            if (node.tag != null
1278:                                    && (node.tag.model & Dict.CM_BLOCK) != 0
1279:                                    && lexer.excludeBlocks) {
1280:                                Report.warning(lexer, list, node,
1281:                                        Report.MISSING_ENDTAG_BEFORE);
1282:                                Node.trimEmptyElement(lexer, list);
1283:                                return;
1284:                            }
1285:
1286:                            node = lexer.inferredTag("li");
1287:                            node.addAttribute("style", "list-style: none");
1288:                            Report.warning(lexer, list, node,
1289:                                    Report.MISSING_STARTTAG);
1290:                        }
1291:
1292:                        /* node should be <LI> */
1293:                        Node.insertNodeAtEnd(list, node);
1294:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
1295:                    }
1296:
1297:                    if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1298:                        Node.coerceNode(lexer, list, tt.tagUl);
1299:
1300:                    Report
1301:                            .warning(lexer, list, node,
1302:                                    Report.MISSING_ENDTAG_FOR);
1303:                    Node.trimEmptyElement(lexer, list);
1304:                }
1305:
1306:            };
1307:
1308:            public static class ParseDefList implements  Parser {
1309:
1310:                public void parse(Lexer lexer, Node list, short mode) {
1311:                    Node node, parent;
1312:                    TagTable tt = lexer.configuration.tt;
1313:
1314:                    if ((list.tag.model & Dict.CM_EMPTY) != 0)
1315:                        return;
1316:
1317:                    lexer.insert = -1; /* defer implicit inline start tags */
1318:
1319:                    while (true) {
1320:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
1321:                        if (node == null)
1322:                            break;
1323:                        if (node.tag == list.tag && node.type == Node.EndTag) {
1324:                            list.closed = true;
1325:                            Node.trimEmptyElement(lexer, list);
1326:                            return;
1327:                        }
1328:
1329:                        /* deal with comments etc. */
1330:                        if (Node.insertMisc(list, node))
1331:                            continue;
1332:
1333:                        if (node.type == Node.TextNode) {
1334:                            lexer.ungetToken();
1335:                            node = lexer.inferredTag("dt");
1336:                            Report.warning(lexer, list, node,
1337:                                    Report.MISSING_STARTTAG);
1338:                        }
1339:
1340:                        if (node.tag == null) {
1341:                            Report.warning(lexer, list, node,
1342:                                    Report.DISCARDING_UNEXPECTED);
1343:                            continue;
1344:                        }
1345:
1346:                        /* 
1347:                          if this is the end tag for an ancestor element
1348:                          then infer end tag for this element
1349:                         */
1350:                        if (node.type == Node.EndTag) {
1351:                            if (node.tag == tt.tagForm) {
1352:                                lexer.badForm = 1;
1353:                                Report.warning(lexer, list, node,
1354:                                        Report.DISCARDING_UNEXPECTED);
1355:                                continue;
1356:                            }
1357:
1358:                            for (parent = list.parent; parent != null; parent = parent.parent) {
1359:                                if (node.tag == parent.tag) {
1360:                                    Report.warning(lexer, list, node,
1361:                                            Report.MISSING_ENDTAG_BEFORE);
1362:
1363:                                    lexer.ungetToken();
1364:                                    Node.trimEmptyElement(lexer, list);
1365:                                    return;
1366:                                }
1367:                            }
1368:                        }
1369:
1370:                        /* center in a dt or a dl breaks the dl list in two */
1371:                        if (node.tag == tt.tagCenter) {
1372:                            if (list.content != null)
1373:                                Node.insertNodeAfterElement(list, node);
1374:                            else /* trim empty dl list */
1375:                            {
1376:                                Node.insertNodeBeforeElement(list, node);
1377:                                Node.discardElement(list);
1378:                            }
1379:
1380:                            /* and parse contents of center */
1381:                            parseTag(lexer, node, mode);
1382:
1383:                            /* now create a new dl element */
1384:                            list = lexer.inferredTag("dl");
1385:                            Node.insertNodeAfterElement(node, list);
1386:                            continue;
1387:                        }
1388:
1389:                        if (!(node.tag == tt.tagDt || node.tag == tt.tagDd)) {
1390:                            lexer.ungetToken();
1391:
1392:                            if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) {
1393:                                Report.warning(lexer, list, node,
1394:                                        Report.TAG_NOT_ALLOWED_IN);
1395:                                Node.trimEmptyElement(lexer, list);
1396:                                return;
1397:                            }
1398:
1399:                            /* if DD appeared directly in BODY then exclude blocks */
1400:                            if (!((node.tag.model & Dict.CM_INLINE) != 0)
1401:                                    && lexer.excludeBlocks) {
1402:                                Node.trimEmptyElement(lexer, list);
1403:                                return;
1404:                            }
1405:
1406:                            node = lexer.inferredTag("dd");
1407:                            Report.warning(lexer, list, node,
1408:                                    Report.MISSING_STARTTAG);
1409:                        }
1410:
1411:                        if (node.type == Node.EndTag) {
1412:                            Report.warning(lexer, list, node,
1413:                                    Report.DISCARDING_UNEXPECTED);
1414:                            continue;
1415:                        }
1416:
1417:                        /* node should be <DT> or <DD>*/
1418:                        Node.insertNodeAtEnd(list, node);
1419:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
1420:                    }
1421:
1422:                    Report
1423:                            .warning(lexer, list, node,
1424:                                    Report.MISSING_ENDTAG_FOR);
1425:                    Node.trimEmptyElement(lexer, list);
1426:                }
1427:
1428:            };
1429:
1430:            public static class ParsePre implements  Parser {
1431:
1432:                public void parse(Lexer lexer, Node pre, short mode) {
1433:                    Node node, parent;
1434:                    TagTable tt = lexer.configuration.tt;
1435:
1436:                    if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1437:                        return;
1438:
1439:                    if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1440:                        Node.coerceNode(lexer, pre, tt.tagPre);
1441:
1442:                    lexer.inlineDup(null); /* tell lexer to insert inlines if needed */
1443:
1444:                    while (true) {
1445:                        node = lexer.getToken(Lexer.Preformatted);
1446:                        if (node == null)
1447:                            break;
1448:                        if (node.tag == pre.tag && node.type == Node.EndTag) {
1449:                            Node.trimSpaces(lexer, pre);
1450:                            pre.closed = true;
1451:                            Node.trimEmptyElement(lexer, pre);
1452:                            return;
1453:                        }
1454:
1455:                        if (node.tag == tt.tagHtml) {
1456:                            if (node.type == Node.StartTag
1457:                                    || node.type == Node.StartEndTag)
1458:                                Report.warning(lexer, pre, node,
1459:                                        Report.DISCARDING_UNEXPECTED);
1460:
1461:                            continue;
1462:                        }
1463:
1464:                        if (node.type == Node.TextNode) {
1465:                            /* if first check for inital newline */
1466:                            if (pre.content == null) {
1467:                                if (node.textarray[node.start] == (byte) '\n')
1468:                                    ++node.start;
1469:
1470:                                if (node.start >= node.end) {
1471:                                    continue;
1472:                                }
1473:                            }
1474:
1475:                            Node.insertNodeAtEnd(pre, node);
1476:                            continue;
1477:                        }
1478:
1479:                        /* deal with comments etc. */
1480:                        if (Node.insertMisc(pre, node))
1481:                            continue;
1482:
1483:                        /* discard unknown  and PARAM tags */
1484:                        if (node.tag == null || node.tag == tt.tagParam) {
1485:                            Report.warning(lexer, pre, node,
1486:                                    Report.DISCARDING_UNEXPECTED);
1487:                            continue;
1488:                        }
1489:
1490:                        if (node.tag == tt.tagP) {
1491:                            if (node.type == Node.StartTag) {
1492:                                Report.warning(lexer, pre, node,
1493:                                        Report.USING_BR_INPLACE_OF);
1494:
1495:                                /* trim white space before <p> in <pre>*/
1496:                                Node.trimSpaces(lexer, pre);
1497:
1498:                                /* coerce both <p> and </p> to <br> */
1499:                                Node.coerceNode(lexer, node, tt.tagBr);
1500:                                Node.insertNodeAtEnd(pre, node);
1501:                            } else {
1502:                                Report.warning(lexer, pre, node,
1503:                                        Report.DISCARDING_UNEXPECTED);
1504:                            }
1505:                            continue;
1506:                        }
1507:
1508:                        if ((node.tag.model & Dict.CM_HEAD) != 0
1509:                                && !((node.tag.model & Dict.CM_BLOCK) != 0)) {
1510:                            moveToHead(lexer, pre, node);
1511:                            continue;
1512:                        }
1513:
1514:                        /* 
1515:                          if this is the end tag for an ancestor element
1516:                          then infer end tag for this element
1517:                         */
1518:                        if (node.type == Node.EndTag) {
1519:                            if (node.tag == tt.tagForm) {
1520:                                lexer.badForm = 1;
1521:                                Report.warning(lexer, pre, node,
1522:                                        Report.DISCARDING_UNEXPECTED);
1523:                                continue;
1524:                            }
1525:
1526:                            for (parent = pre.parent; parent != null; parent = parent.parent) {
1527:                                if (node.tag == parent.tag) {
1528:                                    Report.warning(lexer, pre, node,
1529:                                            Report.MISSING_ENDTAG_BEFORE);
1530:
1531:                                    lexer.ungetToken();
1532:                                    Node.trimSpaces(lexer, pre);
1533:                                    Node.trimEmptyElement(lexer, pre);
1534:                                    return;
1535:                                }
1536:                            }
1537:                        }
1538:
1539:                        /* what about head content, HEAD, BODY tags etc? */
1540:                        if (!((node.tag.model & Dict.CM_INLINE) != 0)) {
1541:                            if (node.type != Node.StartTag) {
1542:                                Report.warning(lexer, pre, node,
1543:                                        Report.DISCARDING_UNEXPECTED);
1544:                                continue;
1545:                            }
1546:
1547:                            Report.warning(lexer, pre, node,
1548:                                    Report.MISSING_ENDTAG_BEFORE);
1549:                            lexer.excludeBlocks = true;
1550:
1551:                            /* check if we need to infer a container */
1552:                            if ((node.tag.model & Dict.CM_LIST) != 0) {
1553:                                lexer.ungetToken();
1554:                                node = lexer.inferredTag("ul");
1555:                                Node.addClass(node, "noindent");
1556:                            } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) {
1557:                                lexer.ungetToken();
1558:                                node = lexer.inferredTag("dl");
1559:                            } else if ((node.tag.model & Dict.CM_TABLE) != 0) {
1560:                                lexer.ungetToken();
1561:                                node = lexer.inferredTag("table");
1562:                            }
1563:
1564:                            Node.insertNodeAfterElement(pre, node);
1565:                            pre = lexer.inferredTag("pre");
1566:                            Node.insertNodeAfterElement(node, pre);
1567:                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
1568:                            lexer.excludeBlocks = false;
1569:                            continue;
1570:                        }
1571:                        /*
1572:                        if (!((node.tag.model & Dict.CM_INLINE) != 0))
1573:                        {
1574:                            Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
1575:                            lexer.ungetToken();
1576:                            return;
1577:                        }
1578:                         */
1579:                        if (node.type == Node.StartTag
1580:                                || node.type == Node.StartEndTag) {
1581:                            /* trim white space before <br> */
1582:                            if (node.tag == tt.tagBr)
1583:                                Node.trimSpaces(lexer, pre);
1584:
1585:                            Node.insertNodeAtEnd(pre, node);
1586:                            parseTag(lexer, node, Lexer.Preformatted);
1587:                            continue;
1588:                        }
1589:
1590:                        /* discard unexpected tags */
1591:                        Report.warning(lexer, pre, node,
1592:                                Report.DISCARDING_UNEXPECTED);
1593:                    }
1594:
1595:                    Report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1596:                    Node.trimEmptyElement(lexer, pre);
1597:                }
1598:
1599:            };
1600:
1601:            public static class ParseBlock implements  Parser {
1602:
1603:                public void parse(Lexer lexer, Node element, short mode)
1604:                /*
1605:                   element is node created by the lexer
1606:                   upon seeing the start tag, or by the
1607:                   parser when the start tag is inferred
1608:                 */
1609:                {
1610:                    Node node, parent;
1611:                    boolean checkstack;
1612:                    int istackbase = 0;
1613:                    TagTable tt = lexer.configuration.tt;
1614:
1615:                    checkstack = true;
1616:
1617:                    if ((element.tag.model & Dict.CM_EMPTY) != 0)
1618:                        return;
1619:
1620:                    if (element.tag == tt.tagForm
1621:                            && element.isDescendantOf(tt.tagForm))
1622:                        Report.warning(lexer, element, null,
1623:                                Report.ILLEGAL_NESTING);
1624:
1625:                    /*
1626:                     InlineDup() asks the lexer to insert inline emphasis tags
1627:                     currently pushed on the istack, but take care to avoid
1628:                     propagating inline emphasis inside OBJECT or APPLET.
1629:                     For these elements a fresh inline stack context is created
1630:                     and disposed of upon reaching the end of the element.
1631:                     They thus behave like table cells in this respect.
1632:                     */
1633:                    if ((element.tag.model & Dict.CM_OBJECT) != 0) {
1634:                        istackbase = lexer.istackbase;
1635:                        lexer.istackbase = lexer.istack.size();
1636:                    }
1637:
1638:                    if (!((element.tag.model & Dict.CM_MIXED) != 0))
1639:                        lexer.inlineDup(null);
1640:
1641:                    mode = Lexer.IgnoreWhitespace;
1642:
1643:                    while (true) {
1644:                        node = lexer.getToken(mode /*Lexer.MixedContent*/);
1645:                        if (node == null)
1646:                            break;
1647:                        /* end tag for this element */
1648:                        if (node.type == Node.EndTag
1649:                                && node.tag != null
1650:                                && (node.tag == element.tag || element.was == node.tag)) {
1651:
1652:                            if ((element.tag.model & Dict.CM_OBJECT) != 0) {
1653:                                /* pop inline stack */
1654:                                while (lexer.istack.size() > lexer.istackbase)
1655:                                    lexer.popInline(null);
1656:                                lexer.istackbase = istackbase;
1657:                            }
1658:
1659:                            element.closed = true;
1660:                            Node.trimSpaces(lexer, element);
1661:                            Node.trimEmptyElement(lexer, element);
1662:                            return;
1663:                        }
1664:
1665:                        if (node.tag == tt.tagHtml || node.tag == tt.tagHead
1666:                                || node.tag == tt.tagBody) {
1667:                            if (node.type == Node.StartTag
1668:                                    || node.type == Node.StartEndTag)
1669:                                Report.warning(lexer, element, node,
1670:                                        Report.DISCARDING_UNEXPECTED);
1671:
1672:                            continue;
1673:                        }
1674:
1675:                        if (node.type == Node.EndTag) {
1676:                            if (node.tag == null) {
1677:                                Report.warning(lexer, element, node,
1678:                                        Report.DISCARDING_UNEXPECTED);
1679:
1680:                                continue;
1681:                            } else if (node.tag == tt.tagBr)
1682:                                node.type = Node.StartTag;
1683:                            else if (node.tag == tt.tagP) {
1684:                                Node.coerceNode(lexer, node, tt.tagBr);
1685:                                Node.insertNodeAtEnd(element, node);
1686:                                node = lexer.inferredTag("br");
1687:                            } else {
1688:                                /* 
1689:                                  if this is the end tag for an ancestor element
1690:                                  then infer end tag for this element
1691:                                 */
1692:                                for (parent = element.parent; parent != null; parent = parent.parent) {
1693:                                    if (node.tag == parent.tag) {
1694:                                        if (!((element.tag.model & Dict.CM_OPT) != 0))
1695:                                            Report
1696:                                                    .warning(
1697:                                                            lexer,
1698:                                                            element,
1699:                                                            node,
1700:                                                            Report.MISSING_ENDTAG_BEFORE);
1701:
1702:                                        lexer.ungetToken();
1703:
1704:                                        if ((element.tag.model & Dict.CM_OBJECT) != 0) {
1705:                                            /* pop inline stack */
1706:                                            while (lexer.istack.size() > lexer.istackbase)
1707:                                                lexer.popInline(null);
1708:                                            lexer.istackbase = istackbase;
1709:                                        }
1710:
1711:                                        Node.trimSpaces(lexer, element);
1712:                                        Node.trimEmptyElement(lexer, element);
1713:                                        return;
1714:                                    }
1715:                                }
1716:                                /* special case </tr> etc. for stuff moved in front of table */
1717:                                if (lexer.exiled
1718:                                        && node.tag.model != 0
1719:                                        && (node.tag.model & Dict.CM_TABLE) != 0) {
1720:                                    lexer.ungetToken();
1721:                                    Node.trimSpaces(lexer, element);
1722:                                    Node.trimEmptyElement(lexer, element);
1723:                                    return;
1724:                                }
1725:                            }
1726:                        }
1727:
1728:                        /* mixed content model permits text */
1729:                        if (node.type == Node.TextNode) {
1730:                            boolean iswhitenode = false;
1731:
1732:                            if (node.type == Node.TextNode
1733:                                    && node.end <= node.start + 1
1734:                                    && lexer.lexbuf[node.start] == (byte) ' ')
1735:                                iswhitenode = true;
1736:
1737:                            if (lexer.configuration.EncloseBlockText
1738:                                    && !iswhitenode) {
1739:                                lexer.ungetToken();
1740:                                node = lexer.inferredTag("p");
1741:                                Node.insertNodeAtEnd(element, node);
1742:                                parseTag(lexer, node, Lexer.MixedContent);
1743:                                continue;
1744:                            }
1745:
1746:                            if (checkstack) {
1747:                                checkstack = false;
1748:
1749:                                if (!((element.tag.model & Dict.CM_MIXED) != 0)) {
1750:                                    if (lexer.inlineDup(node) > 0)
1751:                                        continue;
1752:                                }
1753:                            }
1754:
1755:                            Node.insertNodeAtEnd(element, node);
1756:                            mode = Lexer.MixedContent;
1757:                            /*
1758:                              HTML4 strict doesn't allow mixed content for
1759:                              elements with %block; as their content model
1760:                             */
1761:                            lexer.versions &= ~Dict.VERS_HTML40_STRICT;
1762:                            continue;
1763:                        }
1764:
1765:                        if (Node.insertMisc(element, node))
1766:                            continue;
1767:
1768:                        /* allow PARAM elements? */
1769:                        if (node.tag == tt.tagParam) {
1770:                            if (((element.tag.model & Dict.CM_PARAM) != 0)
1771:                                    && (node.type == Node.StartTag || node.type == Node.StartEndTag)) {
1772:                                Node.insertNodeAtEnd(element, node);
1773:                                continue;
1774:                            }
1775:
1776:                            /* otherwise discard it */
1777:                            Report.warning(lexer, element, node,
1778:                                    Report.DISCARDING_UNEXPECTED);
1779:                            continue;
1780:                        }
1781:
1782:                        /* allow AREA elements? */
1783:                        if (node.tag == tt.tagArea) {
1784:                            if ((element.tag == tt.tagMap)
1785:                                    && (node.type == Node.StartTag || node.type == Node.StartEndTag)) {
1786:                                Node.insertNodeAtEnd(element, node);
1787:                                continue;
1788:                            }
1789:
1790:                            /* otherwise discard it */
1791:                            Report.warning(lexer, element, node,
1792:                                    Report.DISCARDING_UNEXPECTED);
1793:                            continue;
1794:                        }
1795:
1796:                        /* ignore unknown start/end tags */
1797:                        if (node.tag == null) {
1798:                            Report.warning(lexer, element, node,
1799:                                    Report.DISCARDING_UNEXPECTED);
1800:                            continue;
1801:                        }
1802:
1803:                        /*
1804:                          Allow Dict.CM_INLINE elements here.
1805:
1806:                          Allow Dict.CM_BLOCK elements here unless
1807:                          lexer.excludeBlocks is yes.
1808:
1809:                          LI and DD are special cased.
1810:
1811:                          Otherwise infer end tag for this element.
1812:                         */
1813:
1814:                        if (!((node.tag.model & Dict.CM_INLINE) != 0)) {
1815:                            if (node.type != Node.StartTag
1816:                                    && node.type != Node.StartEndTag) {
1817:                                Report.warning(lexer, element, node,
1818:                                        Report.DISCARDING_UNEXPECTED);
1819:                                continue;
1820:                            }
1821:
1822:                            if (element.tag == tt.tagTd
1823:                                    || element.tag == tt.tagTh) {
1824:                                /* if parent is a table cell, avoid inferring the end of the cell */
1825:
1826:                                if ((node.tag.model & Dict.CM_HEAD) != 0) {
1827:                                    moveToHead(lexer, element, node);
1828:                                    continue;
1829:                                }
1830:
1831:                                if ((node.tag.model & Dict.CM_LIST) != 0) {
1832:                                    lexer.ungetToken();
1833:                                    node = lexer.inferredTag("ul");
1834:                                    Node.addClass(node, "noindent");
1835:                                    lexer.excludeBlocks = true;
1836:                                } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) {
1837:                                    lexer.ungetToken();
1838:                                    node = lexer.inferredTag("dl");
1839:                                    lexer.excludeBlocks = true;
1840:                                }
1841:
1842:                                /* infer end of current table cell */
1843:                                if (!((node.tag.model & Dict.CM_BLOCK) != 0)) {
1844:                                    lexer.ungetToken();
1845:                                    Node.trimSpaces(lexer, element);
1846:                                    Node.trimEmptyElement(lexer, element);
1847:                                    return;
1848:                                }
1849:                            } else if ((node.tag.model & Dict.CM_BLOCK) != 0) {
1850:                                if (lexer.excludeBlocks) {
1851:                                    if (!((element.tag.model & Dict.CM_OPT) != 0))
1852:                                        Report.warning(lexer, element, node,
1853:                                                Report.MISSING_ENDTAG_BEFORE);
1854:
1855:                                    lexer.ungetToken();
1856:
1857:                                    if ((element.tag.model & Dict.CM_OBJECT) != 0)
1858:                                        lexer.istackbase = istackbase;
1859:
1860:                                    Node.trimSpaces(lexer, element);
1861:                                    Node.trimEmptyElement(lexer, element);
1862:                                    return;
1863:                                }
1864:                            } else /* things like list items */
1865:                            {
1866:                                if (!((element.tag.model & Dict.CM_OPT) != 0)
1867:                                        && !element.implicit)
1868:                                    Report.warning(lexer, element, node,
1869:                                            Report.MISSING_ENDTAG_BEFORE);
1870:
1871:                                if ((node.tag.model & Dict.CM_HEAD) != 0) {
1872:                                    moveToHead(lexer, element, node);
1873:                                    continue;
1874:                                }
1875:
1876:                                lexer.ungetToken();
1877:
1878:                                if ((node.tag.model & Dict.CM_LIST) != 0) {
1879:                                    if (element.parent != null
1880:                                            && element.parent.tag != null
1881:                                            && element.parent.tag.parser == getParseList()) {
1882:                                        Node.trimSpaces(lexer, element);
1883:                                        Node.trimEmptyElement(lexer, element);
1884:                                        return;
1885:                                    }
1886:
1887:                                    node = lexer.inferredTag("ul");
1888:                                    Node.addClass(node, "noindent");
1889:                                } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) {
1890:                                    if (element.parent.tag == tt.tagDl) {
1891:                                        Node.trimSpaces(lexer, element);
1892:                                        Node.trimEmptyElement(lexer, element);
1893:                                        return;
1894:                                    }
1895:
1896:                                    node = lexer.inferredTag("dl");
1897:                                } else if ((node.tag.model & Dict.CM_TABLE) != 0
1898:                                        || (node.tag.model & Dict.CM_ROW) != 0) {
1899:                                    node = lexer.inferredTag("table");
1900:                                } else if ((element.tag.model & Dict.CM_OBJECT) != 0) {
1901:                                    /* pop inline stack */
1902:                                    while (lexer.istack.size() > lexer.istackbase)
1903:                                        lexer.popInline(null);
1904:                                    lexer.istackbase = istackbase;
1905:                                    Node.trimSpaces(lexer, element);
1906:                                    Node.trimEmptyElement(lexer, element);
1907:                                    return;
1908:
1909:                                } else {
1910:                                    Node.trimSpaces(lexer, element);
1911:                                    Node.trimEmptyElement(lexer, element);
1912:                                    return;
1913:                                }
1914:                            }
1915:                        }
1916:
1917:                        /* parse known element */
1918:                        if (node.type == Node.StartTag
1919:                                || node.type == Node.StartEndTag) {
1920:                            if ((node.tag.model & Dict.CM_INLINE) != 0) {
1921:                                if (checkstack && !node.implicit) {
1922:                                    checkstack = false;
1923:
1924:                                    if (lexer.inlineDup(node) > 0)
1925:                                        continue;
1926:                                }
1927:
1928:                                mode = Lexer.MixedContent;
1929:                            } else {
1930:                                checkstack = true;
1931:                                mode = Lexer.IgnoreWhitespace;
1932:                            }
1933:
1934:                            /* trim white space before <br> */
1935:                            if (node.tag == tt.tagBr)
1936:                                Node.trimSpaces(lexer, element);
1937:
1938:                            Node.insertNodeAtEnd(element, node);
1939:
1940:                            if (node.implicit)
1941:                                Report.warning(lexer, element, node,
1942:                                        Report.INSERTING_TAG);
1943:
1944:                            parseTag(lexer, node, Lexer.IgnoreWhitespace /*Lexer.MixedContent*/);
1945:                            continue;
1946:                        }
1947:
1948:                        /* discard unexpected tags */
1949:                        if (node.type == Node.EndTag)
1950:                            lexer.popInline(node); /* if inline end tag */
1951:
1952:                        Report.warning(lexer, element, node,
1953:                                Report.DISCARDING_UNEXPECTED);
1954:                    }
1955:
1956:                    if (!((element.tag.model & Dict.CM_OPT) != 0))
1957:                        Report.warning(lexer, element, node,
1958:                                Report.MISSING_ENDTAG_FOR);
1959:
1960:                    if ((element.tag.model & Dict.CM_OBJECT) != 0) {
1961:                        /* pop inline stack */
1962:                        while (lexer.istack.size() > lexer.istackbase)
1963:                            lexer.popInline(null);
1964:                        lexer.istackbase = istackbase;
1965:                    }
1966:
1967:                    Node.trimSpaces(lexer, element);
1968:                    Node.trimEmptyElement(lexer, element);
1969:                }
1970:
1971:            };
1972:
1973:            public static class ParseTableTag implements  Parser {
1974:
1975:                public void parse(Lexer lexer, Node table, short mode) {
1976:                    Node node, parent;
1977:                    int istackbase;
1978:                    TagTable tt = lexer.configuration.tt;
1979:
1980:                    lexer.deferDup();
1981:                    istackbase = lexer.istackbase;
1982:                    lexer.istackbase = lexer.istack.size();
1983:
1984:                    while (true) {
1985:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
1986:                        if (node == null)
1987:                            break;
1988:                        if (node.tag == table.tag && node.type == Node.EndTag) {
1989:                            lexer.istackbase = istackbase;
1990:                            table.closed = true;
1991:                            Node.trimEmptyElement(lexer, table);
1992:                            return;
1993:                        }
1994:
1995:                        /* deal with comments etc. */
1996:                        if (Node.insertMisc(table, node))
1997:                            continue;
1998:
1999:                        /* discard unknown tags */
2000:                        if (node.tag == null && node.type != Node.TextNode) {
2001:                            Report.warning(lexer, table, node,
2002:                                    Report.DISCARDING_UNEXPECTED);
2003:                            continue;
2004:                        }
2005:
2006:                        /* if TD or TH or text or inline or block then infer <TR> */
2007:
2008:                        if (node.type != Node.EndTag) {
2009:                            if (node.tag == tt.tagTd || node.tag == tt.tagTh
2010:                                    || node.tag == tt.tagTable) {
2011:                                lexer.ungetToken();
2012:                                node = lexer.inferredTag("tr");
2013:                                Report.warning(lexer, table, node,
2014:                                        Report.MISSING_STARTTAG);
2015:                            } else if (node.type == Node.TextNode
2016:                                    || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) {
2017:                                Node.insertNodeBeforeElement(table, node);
2018:                                Report.warning(lexer, table, node,
2019:                                        Report.TAG_NOT_ALLOWED_IN);
2020:                                lexer.exiled = true;
2021:
2022:                                /* AQ: TODO
2023:                                   Line 2040 of parser.c (13 Jan 2000) reads as follows:
2024:                                   if (!node->type == TextNode)
2025:                                   This will always evaluate to false.
2026:                                   This has been reported to Dave Raggett <dsr@w3.org>
2027:                                 */
2028:                                //Should be?: if (!(node.type == Node.TextNode))
2029:                                if (false)
2030:                                    parseTag(lexer, node,
2031:                                            Lexer.IgnoreWhitespace);
2032:
2033:                                lexer.exiled = false;
2034:                                continue;
2035:                            } else if ((node.tag.model & Dict.CM_HEAD) != 0) {
2036:                                moveToHead(lexer, table, node);
2037:                                continue;
2038:                            }
2039:                        }
2040:
2041:                        /* 
2042:                          if this is the end tag for an ancestor element
2043:                          then infer end tag for this element
2044:                         */
2045:                        if (node.type == Node.EndTag) {
2046:                            if (node.tag == tt.tagForm) {
2047:                                lexer.badForm = 1;
2048:                                Report.warning(lexer, table, node,
2049:                                        Report.DISCARDING_UNEXPECTED);
2050:                                continue;
2051:                            }
2052:
2053:                            if (node.tag != null
2054:                                    && (node.tag.model & (Dict.CM_TABLE | Dict.CM_ROW)) != 0) {
2055:                                Report.warning(lexer, table, node,
2056:                                        Report.DISCARDING_UNEXPECTED);
2057:                                continue;
2058:                            }
2059:
2060:                            for (parent = table.parent; parent != null; parent = parent.parent) {
2061:                                if (node.tag == parent.tag) {
2062:                                    Report.warning(lexer, table, node,
2063:                                            Report.MISSING_ENDTAG_BEFORE);
2064:                                    lexer.ungetToken();
2065:                                    lexer.istackbase = istackbase;
2066:                                    Node.trimEmptyElement(lexer, table);
2067:                                    return;
2068:                                }
2069:                            }
2070:                        }
2071:
2072:                        if (!((node.tag.model & Dict.CM_TABLE) != 0)) {
2073:                            lexer.ungetToken();
2074:                            Report.warning(lexer, table, node,
2075:                                    Report.TAG_NOT_ALLOWED_IN);
2076:                            lexer.istackbase = istackbase;
2077:                            Node.trimEmptyElement(lexer, table);
2078:                            return;
2079:                        }
2080:
2081:                        if (node.type == Node.StartTag
2082:                                || node.type == Node.StartEndTag) {
2083:                            Node.insertNodeAtEnd(table, node);
2084:                            ;
2085:                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
2086:                            continue;
2087:                        }
2088:
2089:                        /* discard unexpected text nodes and end tags */
2090:                        Report.warning(lexer, table, node,
2091:                                Report.DISCARDING_UNEXPECTED);
2092:                    }
2093:
2094:                    Report.warning(lexer, table, node,
2095:                            Report.MISSING_ENDTAG_FOR);
2096:                    Node.trimEmptyElement(lexer, table);
2097:                    lexer.istackbase = istackbase;
2098:                }
2099:
2100:            };
2101:
2102:            public static class ParseColGroup implements  Parser {
2103:
2104:                public void parse(Lexer lexer, Node colgroup, short mode) {
2105:                    Node node, parent;
2106:                    TagTable tt = lexer.configuration.tt;
2107:
2108:                    if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2109:                        return;
2110:
2111:                    while (true) {
2112:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
2113:                        if (node == null)
2114:                            break;
2115:                        if (node.tag == colgroup.tag
2116:                                && node.type == Node.EndTag) {
2117:                            colgroup.closed = true;
2118:                            return;
2119:                        }
2120:
2121:                        /* 
2122:                          if this is the end tag for an ancestor element
2123:                          then infer end tag for this element
2124:                         */
2125:                        if (node.type == Node.EndTag) {
2126:                            if (node.tag == tt.tagForm) {
2127:                                lexer.badForm = 1;
2128:                                Report.warning(lexer, colgroup, node,
2129:                                        Report.DISCARDING_UNEXPECTED);
2130:                                continue;
2131:                            }
2132:
2133:                            for (parent = colgroup.parent; parent != null; parent = parent.parent) {
2134:
2135:                                if (node.tag == parent.tag) {
2136:                                    lexer.ungetToken();
2137:                                    return;
2138:                                }
2139:                            }
2140:                        }
2141:
2142:                        if (node.type == Node.TextNode) {
2143:                            lexer.ungetToken();
2144:                            return;
2145:                        }
2146:
2147:                        /* deal with comments etc. */
2148:                        if (Node.insertMisc(colgroup, node))
2149:                            continue;
2150:
2151:                        /* discard unknown tags */
2152:                        if (node.tag == null) {
2153:                            Report.warning(lexer, colgroup, node,
2154:                                    Report.DISCARDING_UNEXPECTED);
2155:                            continue;
2156:                        }
2157:
2158:                        if (node.tag != tt.tagCol) {
2159:                            lexer.ungetToken();
2160:                            return;
2161:                        }
2162:
2163:                        if (node.type == Node.EndTag) {
2164:                            Report.warning(lexer, colgroup, node,
2165:                                    Report.DISCARDING_UNEXPECTED);
2166:                            continue;
2167:                        }
2168:
2169:                        /* node should be <COL> */
2170:                        Node.insertNodeAtEnd(colgroup, node);
2171:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
2172:                    }
2173:                }
2174:
2175:            };
2176:
2177:            public static class ParseRowGroup implements  Parser {
2178:
2179:                public void parse(Lexer lexer, Node rowgroup, short mode) {
2180:                    Node node, parent;
2181:                    TagTable tt = lexer.configuration.tt;
2182:
2183:                    if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2184:                        return;
2185:
2186:                    while (true) {
2187:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
2188:                        if (node == null)
2189:                            break;
2190:                        if (node.tag == rowgroup.tag) {
2191:                            if (node.type == Node.EndTag) {
2192:                                rowgroup.closed = true;
2193:                                Node.trimEmptyElement(lexer, rowgroup);
2194:                                return;
2195:                            }
2196:
2197:                            lexer.ungetToken();
2198:                            return;
2199:                        }
2200:
2201:                        /* if </table> infer end tag */
2202:                        if (node.tag == tt.tagTable && node.type == Node.EndTag) {
2203:                            lexer.ungetToken();
2204:                            Node.trimEmptyElement(lexer, rowgroup);
2205:                            return;
2206:                        }
2207:
2208:                        /* deal with comments etc. */
2209:                        if (Node.insertMisc(rowgroup, node))
2210:                            continue;
2211:
2212:                        /* discard unknown tags */
2213:                        if (node.tag == null && node.type != Node.TextNode) {
2214:                            Report.warning(lexer, rowgroup, node,
2215:                                    Report.DISCARDING_UNEXPECTED);
2216:                            continue;
2217:                        }
2218:
2219:                        /*
2220:                          if TD or TH then infer <TR>
2221:                          if text or inline or block move before table
2222:                          if head content move to head
2223:                         */
2224:
2225:                        if (node.type != Node.EndTag) {
2226:                            if (node.tag == tt.tagTd || node.tag == tt.tagTh) {
2227:                                lexer.ungetToken();
2228:                                node = lexer.inferredTag("tr");
2229:                                Report.warning(lexer, rowgroup, node,
2230:                                        Report.MISSING_STARTTAG);
2231:                            } else if (node.type == Node.TextNode
2232:                                    || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) {
2233:                                Node.moveBeforeTable(rowgroup, node, tt);
2234:                                Report.warning(lexer, rowgroup, node,
2235:                                        Report.TAG_NOT_ALLOWED_IN);
2236:                                lexer.exiled = true;
2237:
2238:                                if (node.type != Node.TextNode)
2239:                                    parseTag(lexer, node,
2240:                                            Lexer.IgnoreWhitespace);
2241:
2242:                                lexer.exiled = false;
2243:                                continue;
2244:                            } else if ((node.tag.model & Dict.CM_HEAD) != 0) {
2245:                                Report.warning(lexer, rowgroup, node,
2246:                                        Report.TAG_NOT_ALLOWED_IN);
2247:                                moveToHead(lexer, rowgroup, node);
2248:                                continue;
2249:                            }
2250:                        }
2251:
2252:                        /* 
2253:                          if this is the end tag for ancestor element
2254:                          then infer end tag for this element
2255:                         */
2256:                        if (node.type == Node.EndTag) {
2257:                            if (node.tag == tt.tagForm) {
2258:                                lexer.badForm = 1;
2259:                                Report.warning(lexer, rowgroup, node,
2260:                                        Report.DISCARDING_UNEXPECTED);
2261:                                continue;
2262:                            }
2263:
2264:                            if (node.tag == tt.tagTr || node.tag == tt.tagTd
2265:                                    || node.tag == tt.tagTh) {
2266:                                Report.warning(lexer, rowgroup, node,
2267:                                        Report.DISCARDING_UNEXPECTED);
2268:                                continue;
2269:                            }
2270:
2271:                            for (parent = rowgroup.parent; parent != null; parent = parent.parent) {
2272:                                if (node.tag == parent.tag) {
2273:                                    lexer.ungetToken();
2274:                                    Node.trimEmptyElement(lexer, rowgroup);
2275:                                    return;
2276:                                }
2277:                            }
2278:                        }
2279:
2280:                        /*
2281:                          if THEAD, TFOOT or TBODY then implied end tag
2282:
2283:                         */
2284:                        if ((node.tag.model & Dict.CM_ROWGRP) != 0) {
2285:                            if (node.type != Node.EndTag)
2286:                                lexer.ungetToken();
2287:
2288:                            Node.trimEmptyElement(lexer, rowgroup);
2289:                            return;
2290:                        }
2291:
2292:                        if (node.type == Node.EndTag) {
2293:                            Report.warning(lexer, rowgroup, node,
2294:                                    Report.DISCARDING_UNEXPECTED);
2295:                            continue;
2296:                        }
2297:
2298:                        if (!(node.tag == tt.tagTr)) {
2299:                            node = lexer.inferredTag("tr");
2300:                            Report.warning(lexer, rowgroup, node,
2301:                                    Report.MISSING_STARTTAG);
2302:                            lexer.ungetToken();
2303:                        }
2304:
2305:                        /* node should be <TR> */
2306:                        Node.insertNodeAtEnd(rowgroup, node);
2307:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
2308:                    }
2309:
2310:                    Node.trimEmptyElement(lexer, rowgroup);
2311:                }
2312:
2313:            };
2314:
2315:            public static class ParseRow implements  Parser {
2316:
2317:                public void parse(Lexer lexer, Node row, short mode) {
2318:                    Node node, parent;
2319:                    boolean exclude_state;
2320:                    TagTable tt = lexer.configuration.tt;
2321:
2322:                    if ((row.tag.model & Dict.CM_EMPTY) != 0)
2323:                        return;
2324:
2325:                    while (true) {
2326:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
2327:                        if (node == null)
2328:                            break;
2329:                        if (node.tag == row.tag) {
2330:                            if (node.type == Node.EndTag) {
2331:                                row.closed = true;
2332:                                Node.fixEmptyRow(lexer, row);
2333:                                return;
2334:                            }
2335:
2336:                            lexer.ungetToken();
2337:                            Node.fixEmptyRow(lexer, row);
2338:                            return;
2339:                        }
2340:
2341:                        /* 
2342:                          if this is the end tag for an ancestor element
2343:                          then infer end tag for this element
2344:                         */
2345:                        if (node.type == Node.EndTag) {
2346:                            if (node.tag == tt.tagForm) {
2347:                                lexer.badForm = 1;
2348:                                Report.warning(lexer, row, node,
2349:                                        Report.DISCARDING_UNEXPECTED);
2350:                                continue;
2351:                            }
2352:
2353:                            if (node.tag == tt.tagTd || node.tag == tt.tagTh) {
2354:                                Report.warning(lexer, row, node,
2355:                                        Report.DISCARDING_UNEXPECTED);
2356:                                continue;
2357:                            }
2358:
2359:                            for (parent = row.parent; parent != null; parent = parent.parent) {
2360:                                if (node.tag == parent.tag) {
2361:                                    lexer.ungetToken();
2362:                                    Node.trimEmptyElement(lexer, row);
2363:                                    return;
2364:                                }
2365:                            }
2366:                        }
2367:
2368:                        /* deal with comments etc. */
2369:                        if (Node.insertMisc(row, node))
2370:                            continue;
2371:
2372:                        /* discard unknown tags */
2373:                        if (node.tag == null && node.type != Node.TextNode) {
2374:                            Report.warning(lexer, row, node,
2375:                                    Report.DISCARDING_UNEXPECTED);
2376:                            continue;
2377:                        }
2378:
2379:                        /* discard unexpected <table> element */
2380:                        if (node.tag == tt.tagTable) {
2381:                            Report.warning(lexer, row, node,
2382:                                    Report.DISCARDING_UNEXPECTED);
2383:                            continue;
2384:                        }
2385:
2386:                        /* THEAD, TFOOT or TBODY */
2387:                        if (node.tag != null
2388:                                && (node.tag.model & Dict.CM_ROWGRP) != 0) {
2389:                            lexer.ungetToken();
2390:                            Node.trimEmptyElement(lexer, row);
2391:                            return;
2392:                        }
2393:
2394:                        if (node.type == Node.EndTag) {
2395:                            Report.warning(lexer, row, node,
2396:                                    Report.DISCARDING_UNEXPECTED);
2397:                            continue;
2398:                        }
2399:
2400:                        /*
2401:                          if text or inline or block move before table
2402:                          if head content move to head
2403:                         */
2404:
2405:                        if (node.type != Node.EndTag) {
2406:                            if (node.tag == tt.tagForm) {
2407:                                lexer.ungetToken();
2408:                                node = lexer.inferredTag("td");
2409:                                Report.warning(lexer, row, node,
2410:                                        Report.MISSING_STARTTAG);
2411:                            } else if (node.type == Node.TextNode
2412:                                    || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) {
2413:                                Node.moveBeforeTable(row, node, tt);
2414:                                Report.warning(lexer, row, node,
2415:                                        Report.TAG_NOT_ALLOWED_IN);
2416:                                lexer.exiled = true;
2417:
2418:                                if (node.type != Node.TextNode)
2419:                                    parseTag(lexer, node,
2420:                                            Lexer.IgnoreWhitespace);
2421:
2422:                                lexer.exiled = false;
2423:                                continue;
2424:                            } else if ((node.tag.model & Dict.CM_HEAD) != 0) {
2425:                                Report.warning(lexer, row, node,
2426:                                        Report.TAG_NOT_ALLOWED_IN);
2427:                                moveToHead(lexer, row, node);
2428:                                continue;
2429:                            }
2430:                        }
2431:
2432:                        if (!(node.tag == tt.tagTd || node.tag == tt.tagTh)) {
2433:                            Report.warning(lexer, row, node,
2434:                                    Report.TAG_NOT_ALLOWED_IN);
2435:                            continue;
2436:                        }
2437:
2438:                        /* node should be <TD> or <TH> */
2439:                        Node.insertNodeAtEnd(row, node);
2440:                        exclude_state = lexer.excludeBlocks;
2441:                        lexer.excludeBlocks = false;
2442:                        parseTag(lexer, node, Lexer.IgnoreWhitespace);
2443:                        lexer.excludeBlocks = exclude_state;
2444:
2445:                        /* pop inline stack */
2446:
2447:                        while (lexer.istack.size() > lexer.istackbase)
2448:                            lexer.popInline(null);
2449:                    }
2450:
2451:                    Node.trimEmptyElement(lexer, row);
2452:                }
2453:
2454:            };
2455:
2456:            public static class ParseNoFrames implements  Parser {
2457:
2458:                public void parse(Lexer lexer, Node noframes, short mode) {
2459:                    Node node;
2460:                    boolean checkstack;
2461:                    TagTable tt = lexer.configuration.tt;
2462:
2463:                    lexer.badAccess |= Report.USING_NOFRAMES;
2464:                    mode = Lexer.IgnoreWhitespace;
2465:                    checkstack = true;
2466:
2467:                    while (true) {
2468:                        node = lexer.getToken(mode);
2469:                        if (node == null)
2470:                            break;
2471:                        if (node.tag == noframes.tag
2472:                                && node.type == Node.EndTag) {
2473:                            noframes.closed = true;
2474:                            Node.trimSpaces(lexer, noframes);
2475:                            return;
2476:                        }
2477:
2478:                        if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)) {
2479:                            Report.warning(lexer, noframes, node,
2480:                                    Report.MISSING_ENDTAG_BEFORE);
2481:                            Node.trimSpaces(lexer, noframes);
2482:                            lexer.ungetToken();
2483:                            return;
2484:                        }
2485:
2486:                        if (node.tag == tt.tagHtml) {
2487:                            if (node.type == Node.StartTag
2488:                                    || node.type == Node.StartEndTag)
2489:                                Report.warning(lexer, noframes, node,
2490:                                        Report.DISCARDING_UNEXPECTED);
2491:
2492:                            continue;
2493:                        }
2494:
2495:                        /* deal with comments etc. */
2496:                        if (Node.insertMisc(noframes, node))
2497:                            continue;
2498:
2499:                        if (node.tag == tt.tagBody
2500:                                && node.type == Node.StartTag) {
2501:                            Node.insertNodeAtEnd(noframes, node);
2502:                            parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2503:                            continue;
2504:                        }
2505:
2506:                        /* implicit body element inferred */
2507:                        if (node.type == Node.TextNode || node.tag != null) {
2508:                            lexer.ungetToken();
2509:                            node = lexer.inferredTag("body");
2510:                            if (lexer.configuration.XmlOut)
2511:                                Report.warning(lexer, noframes, node,
2512:                                        Report.INSERTING_TAG);
2513:                            Node.insertNodeAtEnd(noframes, node);
2514:                            parseTag(lexer, node, Lexer.IgnoreWhitespace /*MixedContent*/);
2515:                            continue;
2516:                        }
2517:                        /* discard unexpected end tags */
2518:                        Report.warning(lexer, noframes, node,
2519:                                Report.DISCARDING_UNEXPECTED);
2520:                    }
2521:
2522:                    Report.warning(lexer, noframes, node,
2523:                            Report.MISSING_ENDTAG_FOR);
2524:                }
2525:
2526:            };
2527:
2528:            public static class ParseSelect implements  Parser {
2529:
2530:                public void parse(Lexer lexer, Node field, short mode) {
2531:                    Node node;
2532:                    TagTable tt = lexer.configuration.tt;
2533:
2534:                    lexer.insert = -1; /* defer implicit inline start tags */
2535:
2536:                    while (true) {
2537:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
2538:                        if (node == null)
2539:                            break;
2540:                        if (node.tag == field.tag && node.type == Node.EndTag) {
2541:                            field.closed = true;
2542:                            Node.trimSpaces(lexer, field);
2543:                            return;
2544:                        }
2545:
2546:                        /* deal with comments etc. */
2547:                        if (Node.insertMisc(field, node))
2548:                            continue;
2549:
2550:                        if (node.type == Node.StartTag
2551:                                && (node.tag == tt.tagOption
2552:                                        || node.tag == tt.tagOptgroup || node.tag == tt.tagScript)) {
2553:                            Node.insertNodeAtEnd(field, node);
2554:                            parseTag(lexer, node, Lexer.IgnoreWhitespace);
2555:                            continue;
2556:                        }
2557:
2558:                        /* discard unexpected tags */
2559:                        Report.warning(lexer, field, node,
2560:                                Report.DISCARDING_UNEXPECTED);
2561:                    }
2562:
2563:                    Report.warning(lexer, field, node,
2564:                            Report.MISSING_ENDTAG_FOR);
2565:                }
2566:
2567:            };
2568:
2569:            public static class ParseText implements  Parser {
2570:
2571:                public void parse(Lexer lexer, Node field, short mode) {
2572:                    Node node;
2573:                    TagTable tt = lexer.configuration.tt;
2574:
2575:                    lexer.insert = -1; /* defer implicit inline start tags */
2576:
2577:                    if (field.tag == tt.tagTextarea)
2578:                        mode = Lexer.Preformatted;
2579:
2580:                    while (true) {
2581:                        node = lexer.getToken(mode);
2582:                        if (node == null)
2583:                            break;
2584:                        if (node.tag == field.tag && node.type == Node.EndTag) {
2585:                            field.closed = true;
2586:                            Node.trimSpaces(lexer, field);
2587:                            return;
2588:                        }
2589:
2590:                        /* deal with comments etc. */
2591:                        if (Node.insertMisc(field, node))
2592:                            continue;
2593:
2594:                        if (node.type == Node.TextNode) {
2595:                            /* only called for 1st child */
2596:                            if (field.content == null
2597:                                    && !((mode & Lexer.Preformatted) != 0))
2598:                                Node.trimSpaces(lexer, field);
2599:
2600:                            if (node.start >= node.end) {
2601:                                continue;
2602:                            }
2603:
2604:                            Node.insertNodeAtEnd(field, node);
2605:                            continue;
2606:                        }
2607:
2608:                        if (node.tag == tt.tagFont) {
2609:                            Report.warning(lexer, field, node,
2610:                                    Report.DISCARDING_UNEXPECTED);
2611:                            continue;
2612:                        }
2613:
2614:                        /* terminate element on other tags */
2615:                        if (!((field.tag.model & Dict.CM_OPT) != 0))
2616:                            Report.warning(lexer, field, node,
2617:                                    Report.MISSING_ENDTAG_BEFORE);
2618:
2619:                        lexer.ungetToken();
2620:                        Node.trimSpaces(lexer, field);
2621:                        return;
2622:                    }
2623:
2624:                    if (!((field.tag.model & Dict.CM_OPT) != 0))
2625:                        Report.warning(lexer, field, node,
2626:                                Report.MISSING_ENDTAG_FOR);
2627:                }
2628:
2629:            };
2630:
2631:            public static class ParseOptGroup implements  Parser {
2632:
2633:                public void parse(Lexer lexer, Node field, short mode) {
2634:                    Node node;
2635:                    TagTable tt = lexer.configuration.tt;
2636:
2637:                    lexer.insert = -1; /* defer implicit inline start tags */
2638:
2639:                    while (true) {
2640:                        node = lexer.getToken(Lexer.IgnoreWhitespace);
2641:                        if (node == null)
2642:                            break;
2643:                        if (node.tag == field.tag && node.type == Node.EndTag) {
2644:                            field.closed = true;
2645:                            Node.trimSpaces(lexer, field);
2646:                            return;
2647:                        }
2648:
2649:                        /* deal with comments etc. */
2650:                        if (Node.insertMisc(field, node))
2651:                            continue;
2652:
2653:                        if (node.type == Node.StartTag
2654:                                && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup)) {
2655:                            if (node.tag == tt.tagOptgroup)
2656:                                Report.warning(lexer, field, node,
2657:                                        Report.CANT_BE_NESTED);
2658:
2659:                            Node.insertNodeAtEnd(field, node);
2660:                            parseTag(lexer, node, Lexer.MixedContent);
2661:                            continue;
2662:                        }
2663:
2664:                        /* discard unexpected tags */
2665:                        Report.warning(lexer, field, node,
2666:                                Report.DISCARDING_UNEXPECTED);
2667:                    }
2668:                }
2669:
2670:            };
2671:
2672:            public static Parser getParseHTML() {
2673:                return _parseHTML;
2674:            }
2675:
2676:            public static Parser getParseHead() {
2677:                return _parseHead;
2678:            }
2679:
2680:            public static Parser getParseTitle() {
2681:                return _parseTitle;
2682:            }
2683:
2684:            public static Parser getParseScript() {
2685:                return _parseScript;
2686:            }
2687:
2688:            public static Parser getParseBody() {
2689:                return _parseBody;
2690:            }
2691:
2692:            public static Parser getParseFrameSet() {
2693:                return _parseFrameSet;
2694:            }
2695:
2696:            public static Parser getParseInline() {
2697:                return _parseInline;
2698:            }
2699:
2700:            public static Parser getParseList() {
2701:                return _parseList;
2702:            }
2703:
2704:            public static Parser getParseDefList() {
2705:                return _parseDefList;
2706:            }
2707:
2708:            public static Parser getParsePre() {
2709:                return _parsePre;
2710:            }
2711:
2712:            public static Parser getParseBlock() {
2713:                return _parseBlock;
2714:            }
2715:
2716:            public static Parser getParseTableTag() {
2717:                return _parseTableTag;
2718:            }
2719:
2720:            public static Parser getParseColGroup() {
2721:                return _parseColGroup;
2722:            }
2723:
2724:            public static Parser getParseRowGroup() {
2725:                return _parseRowGroup;
2726:            }
2727:
2728:            public static Parser getParseRow() {
2729:                return _parseRow;
2730:            }
2731:
2732:            public static Parser getParseNoFrames() {
2733:                return _parseNoFrames;
2734:            }
2735:
2736:            public static Parser getParseSelect() {
2737:                return _parseSelect;
2738:            }
2739:
2740:            public static Parser getParseText() {
2741:                return _parseText;
2742:            }
2743:
2744:            public static Parser getParseOptGroup() {
2745:                return _parseOptGroup;
2746:            }
2747:
2748:            private static Parser _parseHTML = new ParseHTML();
2749:            private static Parser _parseHead = new ParseHead();
2750:            private static Parser _parseTitle = new ParseTitle();
2751:            private static Parser _parseScript = new ParseScript();
2752:            private static Parser _parseBody = new ParseBody();
2753:            private static Parser _parseFrameSet = new ParseFrameSet();
2754:            private static Parser _parseInline = new ParseInline();
2755:            private static Parser _parseList = new ParseList();
2756:            private static Parser _parseDefList = new ParseDefList();
2757:            private static Parser _parsePre = new ParsePre();
2758:            private static Parser _parseBlock = new ParseBlock();
2759:            private static Parser _parseTableTag = new ParseTableTag();
2760:            private static Parser _parseColGroup = new ParseColGroup();
2761:            private static Parser _parseRowGroup = new ParseRowGroup();
2762:            private static Parser _parseRow = new ParseRow();
2763:            private static Parser _parseNoFrames = new ParseNoFrames();
2764:            private static Parser _parseSelect = new ParseSelect();
2765:            private static Parser _parseText = new ParseText();
2766:            private static Parser _parseOptGroup = new ParseOptGroup();
2767:
2768:            /*
2769:              HTML is the top level element
2770:             */
2771:            public static Node parseDocument(Lexer lexer) {
2772:                Node node, document, html;
2773:                Node doctype = null;
2774:                TagTable tt = lexer.configuration.tt;
2775:
2776:                document = lexer.newNode();
2777:                document.type = Node.RootNode;
2778:
2779:                while (true) {
2780:                    node = lexer.getToken(Lexer.IgnoreWhitespace);
2781:                    if (node == null)
2782:                        break;
2783:
2784:                    /* deal with comments etc. */
2785:                    if (Node.insertMisc(document, node))
2786:                        continue;
2787:
2788:                    if (node.type == Node.DocTypeTag) {
2789:                        if (doctype == null) {
2790:                            Node.insertNodeAtEnd(document, node);
2791:                            doctype = node;
2792:                        } else
2793:                            Report.warning(lexer, document, node,
2794:                                    Report.DISCARDING_UNEXPECTED);
2795:                        continue;
2796:                    }
2797:
2798:                    if (node.type == Node.EndTag) {
2799:                        Report.warning(lexer, document, node,
2800:                                Report.DISCARDING_UNEXPECTED); //TODO?
2801:                        continue;
2802:                    }
2803:
2804:                    if (node.type != Node.StartTag || node.tag != tt.tagHtml) {
2805:                        lexer.ungetToken();
2806:                        html = lexer.inferredTag("html");
2807:                    } else
2808:                        html = node;
2809:
2810:                    Node.insertNodeAtEnd(document, html);
2811:                    getParseHTML().parse(lexer, html, (short) 0); // TODO?
2812:                    break;
2813:                }
2814:
2815:                return document;
2816:            }
2817:
2818:            /**
2819:             *  Indicates whether or not whitespace should be preserved for this element.
2820:             *  If an <code>xml:space</code> attribute is found, then if the attribute value is
2821:             *  <code>preserve</code>, returns <code>true</code>.  For any other value, returns
2822:             *  <code>false</code>.  If an <code>xml:space</code> attribute was <em>not</em>
2823:             *  found, then the following element names result in a return value of <code>true:
2824:             *  pre, script, style,</code> and <code>xsl:text</code>.  Finally, if a
2825:             *  <code>TagTable</code> was passed in and the element appears as the "pre" element
2826:             *  in the <code>TagTable</code>, then <code>true</code> will be returned.
2827:             *  Otherwise, <code>false</code> is returned.
2828:             *  @param element The <code>Node</code> to test to see if whitespace should be
2829:             *                 preserved.
2830:             *  @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code>
2831:             *            function.  This may be <code>null</code>, in which case this test
2832:             *            is bypassed.
2833:             *  @return <code>true</code> or <code>false</code>, as explained above.
2834:             */
2835:
2836:            public static boolean XMLPreserveWhiteSpace(Node element,
2837:                    TagTable tt) {
2838:                AttVal attribute;
2839:
2840:                /* search attributes for xml:space */
2841:                for (attribute = element.attributes; attribute != null; attribute = attribute.next) {
2842:                    if (attribute.attribute.equals("xml:space")) {
2843:                        if (attribute.value.equals("preserve"))
2844:                            return true;
2845:
2846:                        return false;
2847:                    }
2848:                }
2849:
2850:                /* kludge for html docs without explicit xml:space attribute */
2851:                if (Lexer.wstrcasecmp(element.element, "pre") == 0
2852:                        || Lexer.wstrcasecmp(element.element, "script") == 0
2853:                        || Lexer.wstrcasecmp(element.element, "style") == 0)
2854:                    return true;
2855:
2856:                if ((tt != null) && (tt.findParser(element) == getParsePre()))
2857:                    return true;
2858:
2859:                /* kludge for XSL docs */
2860:                if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
2861:                    return true;
2862:
2863:                return false;
2864:            }
2865:
2866:            /*
2867:              XML documents
2868:             */
2869:            public static void parseXMLElement(Lexer lexer, Node element,
2870:                    short mode) {
2871:                Node node;
2872:
2873:                /* Jeff Young's kludge for XSL docs */
2874:
2875:                if (Lexer.wstrcasecmp(element.element, "xsl:text") == 0)
2876:                    return;
2877:
2878:                /* if node is pre or has xml:space="preserve" then do so */
2879:
2880:                if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
2881:                    mode = Lexer.Preformatted;
2882:
2883:                while (true) {
2884:                    node = lexer.getToken(mode);
2885:                    if (node == null)
2886:                        break;
2887:                    if (node.type == Node.EndTag
2888:                            && node.element.equals(element.element)) {
2889:                        element.closed = true;
2890:                        break;
2891:                    }
2892:
2893:                    /* discard unexpected end tags */
2894:                    if (node.type == Node.EndTag) {
2895:                        Report.error(lexer, element, node,
2896:                                Report.UNEXPECTED_ENDTAG);
2897:                        continue;
2898:                    }
2899:
2900:                    /* parse content on seeing start tag */
2901:                    if (node.type == Node.StartTag)
2902:                        parseXMLElement(lexer, node, mode);
2903:
2904:                    Node.insertNodeAtEnd(element, node);
2905:                }
2906:
2907:                /*
2908:                 if first child is text then trim initial space and
2909:                 delete text node if it is empty.
2910:                 */
2911:
2912:                node = element.content;
2913:
2914:                if (node != null && node.type == Node.TextNode
2915:                        && mode != Lexer.Preformatted) {
2916:                    if (node.textarray[node.start] == (byte) ' ') {
2917:                        node.start++;
2918:
2919:                        if (node.start >= node.end)
2920:                            Node.discardElement(node);
2921:                    }
2922:                }
2923:
2924:                /*
2925:                 if last child is text then trim final space and
2926:                 delete the text node if it is empty
2927:                 */
2928:
2929:                node = element.last;
2930:
2931:                if (node != null && node.type == Node.TextNode
2932:                        && mode != Lexer.Preformatted) {
2933:                    if (node.textarray[node.end - 1] == (byte) ' ') {
2934:                        node.end--;
2935:
2936:                        if (node.start >= node.end)
2937:                            Node.discardElement(node);
2938:                    }
2939:                }
2940:            }
2941:
2942:            public static Node parseXMLDocument(Lexer lexer) {
2943:                Node node, document, doctype;
2944:
2945:                document = lexer.newNode();
2946:                document.type = Node.RootNode;
2947:                doctype = null;
2948:                lexer.configuration.XmlTags = true;
2949:
2950:                while (true) {
2951:                    node = lexer.getToken(Lexer.IgnoreWhitespace);
2952:                    if (node == null)
2953:                        break;
2954:                    /* discard unexpected end tags */
2955:                    if (node.type == Node.EndTag) {
2956:                        Report.warning(lexer, null, node,
2957:                                Report.UNEXPECTED_ENDTAG);
2958:                        continue;
2959:                    }
2960:
2961:                    /* deal with comments etc. */
2962:                    if (Node.insertMisc(document, node))
2963:                        continue;
2964:
2965:                    if (node.type == Node.DocTypeTag) {
2966:                        if (doctype == null) {
2967:                            Node.insertNodeAtEnd(document, node);
2968:                            doctype = node;
2969:                        } else
2970:                            Report.warning(lexer, document, node,
2971:                                    Report.DISCARDING_UNEXPECTED); // TODO
2972:                        continue;
2973:                    }
2974:
2975:                    /* if start tag then parse element's content */
2976:                    if (node.type == Node.StartTag) {
2977:                        Node.insertNodeAtEnd(document, node);
2978:                        parseXMLElement(lexer, node, Lexer.IgnoreWhitespace);
2979:                    }
2980:
2981:                }
2982:
2983:                if (false) { //#if 0
2984:                    /* discard the document type */
2985:                    node = document.findDocType();
2986:
2987:                    if (node != null)
2988:                        Node.discardElement(node);
2989:                } // #endif
2990:
2991:                if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
2992:                    Report.warning(lexer, doctype, null,
2993:                            Report.DTYPE_NOT_UPPER_CASE);
2994:
2995:                /* ensure presence of initial <?XML version="1.0"?> */
2996:                if (lexer.configuration.XmlPi)
2997:                    lexer.fixXMLPI(document);
2998:
2999:                return document;
3000:            }
3001:
3002:            public static boolean isJavaScript(Node node) {
3003:                boolean result = false;
3004:                AttVal attr;
3005:
3006:                if (node.attributes == null)
3007:                    return true;
3008:
3009:                for (attr = node.attributes; attr != null; attr = attr.next) {
3010:                    if ((Lexer.wstrcasecmp(attr.attribute, "language") == 0 || Lexer
3011:                            .wstrcasecmp(attr.attribute, "type") == 0)
3012:                            && Lexer.wsubstr(attr.value, "javascript"))
3013:                        result = true;
3014:                }
3015:
3016:                return result;
3017:            }
3018:
3019:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.