Source Code Cross Referenced for Parser.java in » Apache-Harmony-Java-SE » javax-package » javax » swing » text » html » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Apache Harmony Java SE » javax package » javax.swing.text.html.parser
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         *  Licensed to the Apache Software Foundation (ASF) under one or more
0003:         *  contributor license agreements.  See the NOTICE file distributed with
0004:         *  this work for additional information regarding copyright ownership.
0005:         *  The ASF licenses this file to You under the Apache License, Version 2.0
0006:         *  (the "License"); you may not use this file except in compliance with
0007:         *  the License.  You may obtain a copy of the License at
0008:         *
0009:         *     http://www.apache.org/licenses/LICENSE-2.0
0010:         *
0011:         *  Unless required by applicable law or agreed to in writing, software
0012:         *  distributed under the License is distributed on an "AS IS" BASIS,
0013:         *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         *  See the License for the specific language governing permissions and
0015:         *  limitations under the License.
0016:         */
0017:        package javax.swing.text.html.parser;
0018:
0019:        import java.io.IOException;
0020:        import java.io.Reader;
0021:        import java.util.BitSet;
0022:        import java.util.Enumeration;
0023:        import java.util.LinkedList;
0024:        import java.util.List;
0025:
0026:        import javax.swing.text.ChangedCharSetException;
0027:        import javax.swing.text.MutableAttributeSet;
0028:        import javax.swing.text.SimpleAttributeSet;
0029:        import javax.swing.text.html.HTML;
0030:        import javax.swing.text.html.HTML.Tag;
0031:        import javax.swing.tree.DefaultMutableTreeNode;
0032:
0033:        import org.apache.harmony.x.swing.internal.nls.Messages;
0034:
0035:        /**
0036:         * This class attempts to read and parse an HTML file, which it gets via an
0037:         * Input Stream. The parsing is based on a Document Type Definition
0038:         * ({@link DTD}), and calls various methods (such as handleError,
0039:         * handleStartTag, etc.) when it finds tags or data. This methods should be
0040:         * overriden in a subclass in order to use the parser.
0041:         */
0042:        public class Parser implements  DTDConstants {
0043:
0044:            /**
0045:             * A reference to the class that dialogates with CUP
0046:             */
0047:            private ParserHandler handler;
0048:
0049:            /**
0050:             * An instance of {@link MutableAttributeSet } 
0051:             * returned by {@link Parser#getAttributes() }
0052:             *       method
0053:             */
0054:            private SimpleAttributeSet attributes;
0055:
0056:            /**
0057:             * The current position into the document been parsed.
0058:             */
0059:            private int currentStartPos;
0060:
0061:            /**
0062:             * The current position into the document been parsed.
0063:             */
0064:            private int currentEndPos;
0065:
0066:            /**
0067:             * The current line, where the document is been parsed.
0068:             */
0069:            private int currentLine;
0070:
0071:            /**
0072:             * The actual {@link DTD} used to parsed the document.
0073:             */
0074:            protected DTD dtd;
0075:
0076:            /**
0077:             * Defines whether the parsing of the document is strict or not.
0078:             */
0079:            protected boolean strict;
0080:
0081:            /**
0082:             * The key word that identifies a Markup declaration in which the document
0083:             * type is defined.
0084:             */
0085:            private final String DOCTYPE_DECL = "DOCTYPE";
0086:
0087:            /**
0088:             * A reference to the Reader used by the parser method to parse a file.
0089:             */
0090:            private Reader file;
0091:
0092:            /**
0093:             * An instance of the last Markup declaration found by the parser.
0094:             */
0095:            private HTMLMarkup LastMarkupDecl;
0096:
0097:            /**
0098:             * Indicates whether the HTMLTagType is
0099:             * {@link HTMLTagType#SIMPLE}. Is used to give the same behaviour of the RI
0100:             * in the methods {@link Parser#startTag(TagElement)} and
0101:             * {@link Parser#endTag(boolean)}
0102:             */
0103:            private boolean isCurrentTagSimple;
0104:
0105:            /*
0106:             * ********************************************************************
0107:             * Public and Protected Methods/Constructor
0108:             * ********************************************************************
0109:             */
0110:
0111:            /**
0112:             * Construct a new {@link Parser} using the information stored in a
0113:             * {@link DTD}
0114:             * 
0115:             * @param dtd the {@link DTD} where the information is stored.
0116:             */
0117:            public Parser(final DTD dtd) {
0118:                this .dtd = dtd;
0119:                handler = new ParserHandlerImpl();
0120:            }
0121:
0122:            /**
0123:             * Calls method that reports that a closing tag has been found.
0124:             * @param omitted determines whether the end tag may be omitted or not.
0125:             */
0126:            protected void endTag(final boolean omitted) {
0127:                throw new UnsupportedOperationException(Messages
0128:                        .getString("swing.27")); //$NON-NLS-1$
0129:                // XXX: Perhaps depending on the boolean value, an endtag.missing
0130:                // error may be thrown
0131:                //handleEndTag(currentTag);
0132:            }
0133:
0134:            /**
0135:             * Reports an error message with only one information field.
0136:             * 
0137:             * @param err the error message.
0138:             */
0139:            protected void error(final String err) {
0140:                error(err, HTMLErrorType.DEF_ERROR.toString(),
0141:                        HTMLErrorType.DEF_ERROR.toString(),
0142:                        HTMLErrorType.DEF_ERROR.toString());
0143:            }
0144:
0145:            private void error(final HTMLErrorType errorType) {
0146:                error(errorType.toString(), HTMLErrorType.DEF_ERROR.toString(),
0147:                        HTMLErrorType.DEF_ERROR.toString(),
0148:                        HTMLErrorType.DEF_ERROR.toString());
0149:            }
0150:
0151:            /**
0152:             * Reports an error message with two information field
0153:             *  
0154:             * @param err the first part of the message.
0155:             * @param arg1 the second part of the message.
0156:             */
0157:            protected void error(final String err, final String arg1) {
0158:                error(err, arg1, HTMLErrorType.DEF_ERROR.toString(),
0159:                        HTMLErrorType.DEF_ERROR.toString());
0160:            }
0161:
0162:            /**
0163:             * Reports an error message with two information field
0164:             *  
0165:             * @param errorType the type of the error.
0166:             * @param arg1 the second part of the message.
0167:             */
0168:            private void error(final HTMLErrorType errorType, final String arg1) {
0169:                error(errorType.toString(), arg1, HTMLErrorType.DEF_ERROR
0170:                        .toString(), HTMLErrorType.DEF_ERROR.toString());
0171:            }
0172:
0173:            /**
0174:             * Reports an error message with three information field
0175:             * 
0176:             * @param err the first part of the message.
0177:             * @param arg1 the second part of the message.
0178:             * @param arg2 the third part of the message.
0179:             */
0180:            protected void error(final String err, final String arg1,
0181:                    final String arg2) {
0182:                error(err, arg1, arg2, HTMLErrorType.DEF_ERROR.toString());
0183:            }
0184:
0185:            /**
0186:             * Reports an error message with three information field
0187:             * 
0188:             * @param errorType The type of the error.
0189:             * @param arg1 the first part of the message.
0190:             * @param arg2 the second part of the message.
0191:             */
0192:            private void error(final HTMLErrorType errorType,
0193:                    final String arg1, final String arg2) {
0194:                error(errorType.toString(), arg1, arg2, HTMLErrorType.DEF_ERROR
0195:                        .toString());
0196:            }
0197:
0198:            /**
0199:             * Reports an error message with four information field
0200:             * 
0201:             * @param err the first part of the message.
0202:             * @param arg1 the second part of the message.
0203:             * @param arg2 the third part of the message.
0204:             * @param arg3 the forth part of the message.
0205:             */
0206:            protected void error(final String err, final String arg1,
0207:                    final String arg2, final String arg3) {
0208:                handleError(currentStartPos, err + arg1 + arg2 + arg3);
0209:            }
0210:
0211:            /**
0212:             * Reports an error message with four information field
0213:             * 
0214:             * @param errorType The type of the error.
0215:             * @param arg1 the first part of the message.
0216:             * @param arg2 the second part of the message.
0217:             * @param arg3 the third part of the message.
0218:             */
0219:            private void error(final HTMLErrorType errorType,
0220:                    final String arg1, final String arg2, final String arg3) {
0221:                error(errorType.toString(), arg1, arg2, arg3);
0222:            }
0223:
0224:            /**
0225:             * Cleans the information stored in the attribute's stack.
0226:             *
0227:             */
0228:            protected void flushAttributes() {
0229:                attributes = new SimpleAttributeSet();
0230:            }
0231:
0232:            /**
0233:             * Returns the attributes stored in the attribute's stack.
0234:             * 
0235:             * @return the attributes of the actual attribute's stack.
0236:             */
0237:            protected SimpleAttributeSet getAttributes() {
0238:                return attributes;
0239:            }
0240:
0241:            /**
0242:             * Reports the line number where the parser is scanning the parsed file.
0243:             * 
0244:             * @return the actual line number in the document.
0245:             */
0246:            protected int getCurrentLine() {
0247:                return currentLine;
0248:            }
0249:
0250:            /**
0251:             * Reports the current position that is being parsed on the document.
0252:             * 
0253:             * @return the actual position into the parsed file.
0254:             */
0255:            protected int getCurrentPos() {
0256:                return currentStartPos;
0257:            }
0258:
0259:            /**
0260:             * This method is called when a comment is found in the parsed file.
0261:             * 
0262:             * @param text the text found as comment.
0263:             */
0264:            protected void handleComment(final char[] text) {
0265:            }
0266:
0267:            /**
0268:             * This method is called when a simple or empty tag is found in the parsed
0269:             * file.
0270:             * 
0271:             * @param tag the {@link TagElement} that contains the information of the
0272:             *            parsed opening tag.
0273:             * @throws ChangedCharSetException
0274:             */
0275:            protected void handleEmptyTag(final TagElement tag)
0276:                    throws ChangedCharSetException {
0277:            }
0278:
0279:            /**
0280:             * This method is called when a closing tag is found in the parsed file.
0281:             * 
0282:             * @param tag the {@link TagElement} that contains the information of the
0283:             *            parsed opening tag.
0284:             */
0285:            protected void handleEndTag(final TagElement tag) {
0286:            }
0287:
0288:            /**
0289:             * This method is called when the end of the parsed file is found inside
0290:             * a comment.
0291:             *
0292:             */
0293:            protected void handleEOFInComment() {
0294:                throw new UnsupportedOperationException(Messages
0295:                        .getString("swing.9F")); //$NON-NLS-1$
0296:            }
0297:
0298:            /**
0299:             * This method is called when an error is found in the parsed file. 
0300:             * 
0301:             * @param ln the line number where the error was found.
0302:             * @param msg an appropiate message for the found error.
0303:             */
0304:            protected void handleError(final int ln, final String msg) {
0305:
0306:            }
0307:
0308:            /**
0309:             * This method is called when an opening tag, that is not simple or empty,
0310:             * is found in the parsed file.
0311:             * 
0312:             * @param tag the {@link TagElement} that contains the information of the
0313:             *            parsed opening tag.
0314:             */
0315:            protected void handleStartTag(final TagElement tag) {
0316:            }
0317:
0318:            /**
0319:             * This method is called when a piece of text is found in the parsed file.
0320:             * 
0321:             * @param text the piece of text found in the document.
0322:             */
0323:            protected void handleText(final char[] text) {
0324:            }
0325:
0326:            /**
0327:             * This method is called when a title is found in the parsed file.
0328:             * 
0329:             * @param text the piece of text found as part of the title of the parsed
0330:             *             file.
0331:             */
0332:            protected void handleTitle(final char[] text) {
0333:            }
0334:
0335:            /**
0336:             * Construct a new {@link TagElement} with the information stored into a
0337:             * {@link Element}.
0338:             * 
0339:             * @param elem the {@link Element} that constains the information.
0340:             * @return a new {@link TagElement} that encapsullates the {@link Element}
0341:             *         received as argument. The fictional value is set to false.
0342:             */
0343:            protected TagElement makeTag(final Element elem) {
0344:                return new TagElement(elem);
0345:            }
0346:
0347:            /**
0348:             * Construct a new {@link TagElement} with the information stored into a
0349:             * {@link Element}.
0350:             * 
0351:             * @param elem the {@link Element} that constains the information.
0352:             * @param fictional the value stored in the fictional field of the
0353:             * {@link TagElement}.
0354:             * @return a new {@link TagElement} that encapsullates the {@link Element}
0355:             *         received as argument.
0356:             */
0357:            protected TagElement makeTag(final Element elem,
0358:                    final boolean fictional) {
0359:                return new TagElement(elem, fictional);
0360:            }
0361:
0362:            /**
0363:             * It marks the first occurence of an element inside a document.
0364:             * @param elem the {@link Element} whose first occurence wants to be
0365:             *             marked.
0366:             */
0367:            protected void markFirstTime(final Element elem) {
0368:                // TODO review this
0369:                throw new UnsupportedOperationException(Messages
0370:                        .getString("swing.9F")); //$NON-NLS-1$
0371:            }
0372:
0373:            /**
0374:             * It parses a HTML document. <br>
0375:             * During the parsing process, this method invokes the handlers for text,
0376:             * tags, comment, ... that posses this class. In this way, the user may be
0377:             * notified about all the information found in the parsed file.
0378:             *  
0379:             * @param in a reader from where the document to be parsed will be extract
0380:             *           during the parsing process.
0381:             * @throws IOException
0382:             */
0383:            public synchronized void parse(final Reader in) throws IOException {
0384:                if (in == null) {
0385:                    // same as RI
0386:                    throw new NullPointerException();
0387:                }
0388:                file = in;
0389:                handler.parse(file);
0390:            }
0391:
0392:            /**
0393:             * Obtains the information of the last parsed markup declaration in the
0394:             * parsed file. 
0395:             * @return the information stored in the last parsed markup declaration.
0396:             * @throws IOException
0397:             */
0398:            public String parseDTDMarkup() throws IOException {
0399:                file.ready(); // To satisfy RI behavior
0400:                return LastMarkupDecl.getDeclaration().substring(
0401:                        DOCTYPE_DECL.length(),
0402:                        LastMarkupDecl.getDeclaration().length())
0403:                        + LastMarkupDecl.getContent();
0404:            }
0405:
0406:            protected boolean parseMarkupDeclarations(final StringBuffer strBuff)
0407:                    throws IOException {
0408:                boolean isDeclaration = strBuff.toString().toUpperCase()
0409:                        .startsWith(DOCTYPE_DECL);
0410:                if (isDeclaration) {
0411:                    parseDTDMarkup();
0412:                }
0413:                return isDeclaration;
0414:            }
0415:
0416:            protected void startTag(final TagElement tag)
0417:                    throws ChangedCharSetException {
0418:
0419:                if (isCurrentTagSimple) {
0420:                    handleEmptyTag(tag);
0421:                } else {
0422:                    handleStartTag(tag);
0423:                }
0424:
0425:            }
0426:
0427:            // Invoked by DocumentParser
0428:            String getEOLString() {
0429:                return handler.getEOLString();
0430:            }
0431:
0432:            /*
0433:             * ********************************************************************
0434:             * Inner class
0435:             * ********************************************************************
0436:             */
0437:
0438:            class ParserHandlerImpl implements  ParserHandler {
0439:
0440:                /**
0441:                 * The CUP parser used to parse a file.
0442:                 */
0443:                private ParserCup cup;
0444:
0445:                /**
0446:                 * The LEXER generated by JFlex specification
0447:                 */
0448:                private Lexer lexer;
0449:
0450:                /**
0451:                 * The last information retrieved from the CUP Parser.
0452:                 */
0453:                private HTMLText htmlText2flush;
0454:
0455:                boolean trailingSpaceAppended;
0456:
0457:                /**
0458:                 * Defines the maximun depth searched into a {@link ContentModel} when
0459:                 * looking for an implication chain of {@link Element}s.
0460:                 */
0461:                private static final int MAX_DEPTH = 2;
0462:
0463:                private static final String CLASS_ATTR = "class";
0464:
0465:                /**
0466:                 * Is used in order to handle any text as comment when is 
0467:                 * inside a script tag
0468:                 */
0469:                private int scriptDepth;
0470:
0471:                /**
0472:                 * The actual element in the parsing tree.
0473:                 */
0474:                private DefaultMutableTreeNode currentElemNode;
0475:
0476:                private DefaultMutableTreeNode lastElemSeen;
0477:
0478:                public ParserHandlerImpl() {
0479:                    // TODO COMPLETE ME ?
0480:                }
0481:
0482:                /**
0483:                 * Starts the parse of a reader.
0484:                 * 
0485:                 * @param in
0486:                 *            The Reader to be parsed.
0487:                 * @throws IOException
0488:                 *             When the Reader could not be read.
0489:                 */
0490:                public void parse(Reader in) throws IOException {
0491:                    lexer = new Lexer(in);
0492:                    lexer.setDTD(dtd);
0493:                    cup = new ParserCup(lexer);
0494:                    cup.setCallBack(this );
0495:                    lexer.setCup(cup);
0496:                    lexer.setStrict(strict);
0497:                    try {
0498:                        cup.parse();
0499:                        flushHtmlText(true);
0500:                        reportRemainingElements();
0501:                    } catch (ClassCastException e) {
0502:                        cup.done_parsing();
0503:                    } catch (ChangedCharSetException e) {
0504:                        throw e;
0505:                    } catch (Exception e) {
0506:                        // FIXME : CALL HANDLE ERROR HERE ?
0507:                        throw new IOException(e.toString());
0508:                    }
0509:                }
0510:
0511:                /*
0512:                 * ********************************************************************
0513:                 * handle methods
0514:                 * ********************************************************************
0515:                 */
0516:
0517:                /**
0518:                 * This method is called when the lexer finds a token that looks like 
0519:                 * an opening tag.
0520:                 * <BR>
0521:                 * Among other things, this method analyzes:
0522:                 * <ol>
0523:                 * <li> If the name of the found tag is a valid one or not.
0524:                 * <li> If all of its attributes are valid, or there are some of them
0525:                 * that were not part of the preestablished attributes for an specific
0526:                 * tag.
0527:                 * <li> The tag type. This means whether it is a simple (empty) one, or
0528:                 * a common opening tag (that may requiere a matching closing tag).
0529:                 * </ol> 
0530:                 * 
0531:                 * @param htmlTag a {@link HTMLTag} element that contains all the
0532:                 * information refered to the tag found in the document.
0533:                 * @throws ChangedCharSetException 
0534:                 */
0535:                public void iHaveNewStartTag(HTMLTag htmlTag)
0536:                        throws ChangedCharSetException {
0537:                    flushAttributes();
0538:                    currentLine = htmlTag.getLine() + 2;
0539:                    currentStartPos = htmlTag.getOffset();
0540:                    currentEndPos = htmlTag.getEndPos();
0541:
0542:                    String tagName = htmlTag.getName();
0543:
0544:                    Element element = dtd.elementHash.get(tagName);
0545:
0546:                    TagElement currentTag;
0547:                    HTMLTagType tagType;
0548:                    if (element != null) {
0549:                        tagType = getType(htmlTag, element);
0550:                    } else {
0551:                        element = new Element(-1, tagName, false, false, null,
0552:                                null, -1, null, null, null);
0553:                        handleUnrecognizedError(htmlTag);
0554:                        /* Before any flush of strInfo we report the error (same as RI) */
0555:                        tagType = HTMLTagType.SIMPLE;
0556:                    }
0557:                    handleUnsupportedJavaScript(element);
0558:
0559:                    /* flush text */
0560:                    currentTag = new TagElement(element);
0561:                    boolean breaksFlowAfter = currentTag.breaksFlow();
0562:                    flushHtmlText(breaksFlowAfter);
0563:
0564:                    /* handle attributes */
0565:                    handleTagAttr(htmlTag, element);
0566:
0567:                    /* impply */
0568:                    boolean mustBeReported = manageStartElement(element,
0569:                            htmlTag.getEndPos());
0570:
0571:                    if (mustBeReported) {
0572:                        if (tagType == HTMLTagType.START) {
0573:                            isCurrentTagSimple = false;
0574:                        } else if (tagType == HTMLTagType.SIMPLE) {
0575:                            isCurrentTagSimple = true;
0576:                            lastElemSeen = new DefaultMutableTreeNode(
0577:                                    currentTag);
0578:                        } else {
0579:                            // this should not happen
0580:                            throw new AssertionError();
0581:                        }
0582:                        startTag(currentTag);
0583:                    }
0584:                    if (isCurrentTagSimple) {
0585:                        flushAttributes();
0586:                        levelUp();
0587:                    }
0588:                }
0589:
0590:                /**
0591:                 * This method is called by the lexer, when a token that looks like
0592:                 * a <em>Markup Declaration</em> is found in the stream been parsed.
0593:                 * 
0594:                 * @param htmlMarkup a {@link HTMLMarkup} element that contains all
0595:                 * the information needed to manage a Markup Declaration.
0596:                 * @throws IOException 
0597:                 */
0598:                public void iHaveNewMarkup(HTMLMarkup htmlMarkup)
0599:                        throws IOException {
0600:                    flushHtmlText(true);
0601:                    currentStartPos = htmlMarkup.getOffset();
0602:                    LastMarkupDecl = htmlMarkup;
0603:                    parseMarkupDeclarations(new StringBuffer(htmlMarkup
0604:                            .getDeclaration()));
0605:                }
0606:
0607:                /**
0608:                 * This method is called by the lexer, when a token that looks like
0609:                 * a closing tag is found in the parsed stream.
0610:                 * 
0611:                 * @param htmlTag a {@link HTMLTag} element that contains all the
0612:                 * information related to the closing tag, that was found in the parsed
0613:                 * stream.
0614:                 * 
0615:                 */
0616:                public void iHaveNewEndTag(final HTMLTag htmlTag) {
0617:                    currentLine = htmlTag.getLine() + 1;
0618:                    String tagName = htmlTag.getName();
0619:                    Element element = dtd.elementHash.get(tagName);
0620:                    TagElement currentTag;
0621:                    if (element != null) {
0622:                        currentTag = new TagElement(element);
0623:                        flushHtmlText(currentTag.breaksFlow());
0624:                    } else {
0625:                        handleUnrecognizedError(htmlTag);
0626:                        element = new Element(-1, tagName, false, false, null,
0627:                                null, -1, null, null, null);
0628:                        currentTag = new TagElement(element);
0629:                        flushHtmlText(currentTag.breaksFlow());
0630:                        try {
0631:                            attributes.addAttribute("endtag", Boolean.TRUE);
0632:                            handleEmptyTag(currentTag);
0633:                        } catch (ChangedCharSetException e) {
0634:                            // this shouldn't happen
0635:                            throw new AssertionError();
0636:                        }
0637:                    }
0638:
0639:                    currentStartPos = htmlTag.getOffset();
0640:                    currentEndPos = htmlTag.getEndPos();
0641:
0642:                    boolean mustBeReported = manageEndElement(element);
0643:
0644:                    if (mustBeReported) {
0645:                        handleEndTag(currentTag);
0646:                    }
0647:                }
0648:
0649:                /**
0650:                 * This method is called by the lexer, when a new piece of text is
0651:                 * found in the parsed stream.
0652:                 * 
0653:                 * @param htmlText a {@link HTMLText} element that contains all the
0654:                 * information related to the piece of text found in the stream. 
0655:                 */
0656:                public void iHaveNewText(final HTMLText htmlText) {
0657:                    // flush any remaing text...
0658:                    flushHtmlText(false);
0659:                    currentStartPos = htmlText.getOffset();
0660:
0661:                    if (scriptDepth > 0) {
0662:                        handleComment(getText(htmlText, false, false));
0663:                    } else {
0664:                        htmlText2flush = htmlText;
0665:                    }
0666:                }
0667:
0668:                /**
0669:                 * This method is called by the lexer, when a new comment is found
0670:                 * in the parsed stream.
0671:                 * 
0672:                 * @param htmlComment a {@link HTMLComment} element that contains all
0673:                 * the information related with the found comment.
0674:                 */
0675:                public void iHaveNewComment(HTMLComment htmlComment) {
0676:                    flushHtmlText(false);
0677:                    currentLine = htmlComment.getLine() + 1;
0678:                    currentStartPos = htmlComment.getOffset();
0679:                    handleComment(htmlComment.getText().toCharArray());
0680:                }
0681:
0682:                /**
0683:                 * This method is called by the lexer when an error is found in the
0684:                 * stream that is being parsed.
0685:                 * 
0686:                 * @param errMsgType an appropiate error message according to the found
0687:                 *               error.
0688:                 * @param attr1 the second part of the message.
0689:                 * @param attr2 the third part of the message.
0690:                 * @param attr3 the fourth part of the message.
0691:                 */
0692:                public void iHaveNewError(HTMLErrorType errMsgType,
0693:                        String attr1, String attr2, String attr3) {
0694:                    error(errMsgType, attr1 == null ? HTMLErrorType.DEF_ERROR
0695:                            .toString() : attr1,
0696:                            attr2 == null ? HTMLErrorType.DEF_ERROR.toString()
0697:                                    : attr2,
0698:                            attr3 == null ? HTMLErrorType.DEF_ERROR.toString()
0699:                                    : attr3);
0700:                }
0701:
0702:                /**
0703:                 * It reports the tags that remains open after the end of a document was
0704:                 * reached. This is equivalent to think that some tags remains in the
0705:                 * parsing stack. 
0706:                 */
0707:                public void reportRemainingElements() {
0708:                    if (currentElemNode != null) {
0709:                        flushHtmlText(getNodeTagElement(currentElemNode)
0710:                                .breaksFlow());
0711:                    }
0712:                    currentStartPos = currentEndPos + 1; // same as RI
0713:                    while (currentElemNode != null) {
0714:                        TagElement tag = (TagElement) currentElemNode
0715:                                .getUserObject();
0716:                        reportImpliedEndTag(tag.getElement());
0717:                        levelUp();
0718:                    }
0719:                }
0720:
0721:                /**
0722:                 * Reports which has been the line terminator that most appear in the
0723:                 * parsed stream.
0724:                 * <BR>
0725:                 * The String returned can be any of the following ones:
0726:                 * <ol>
0727:                 * <li> "\n" (Linux)
0728:                 * <li> "\r\n" (Windows)
0729:                 * <li> "\r" (Mac)
0730:                 * </ol>
0731:                 */
0732:                public String getEOLString() {
0733:                    return lexer.getEOLString();
0734:                }
0735:
0736:                /*
0737:                 * ********************************************************************
0738:                 * Auxiliar methods
0739:                 * ********************************************************************
0740:                 */
0741:
0742:                private boolean breaksFlowBefore() {
0743:                    return lastElemSeen == null ? false : getNodeTag(
0744:                            lastElemSeen).breaksFlow();
0745:                }
0746:
0747:                private void flushHtmlText(boolean breaksFlowAfter) {
0748:                    if (htmlText2flush != null) {
0749:                        boolean breaksFlowBefore = breaksFlowBefore();
0750:                        char[] s = getText(htmlText2flush, breaksFlowBefore,
0751:                                breaksFlowAfter);
0752:                        if (s.length != 0) {
0753:                            Tag tag = getNodeTag(currentElemNode);
0754:                            if (tag != null && tag.equals(Tag.TITLE)) {
0755:                                handleTitle(s);
0756:                            }
0757:                            manageStartElement(dtd.pcdata, htmlText2flush
0758:                                    .getOffset());
0759:                            currentStartPos = htmlText2flush.getOffset();
0760:                            handleText(s);
0761:                        }
0762:                        htmlText2flush = null;
0763:                    }
0764:                }
0765:
0766:                // FIXME review strict mode
0767:                private char[] getText(HTMLText htmlText,
0768:                        boolean breaksFlowBefore, boolean breaksFlowAfter) {
0769:                    String str = htmlText.getText();
0770:
0771:                    if (htmlText.hasLeadingSpaces() && !trailingSpaceAppended
0772:                            && !breaksFlowBefore) {
0773:                        str = " " + str;
0774:                    }
0775:                    if (htmlText.hasTrailingSpaces() && !breaksFlowAfter) {
0776:                        str += " ";
0777:                        trailingSpaceAppended = true;
0778:                    } else {
0779:                        trailingSpaceAppended = false;
0780:                    }
0781:                    return str.toCharArray();
0782:                }
0783:
0784:                private HTMLTagType getType(HTMLTag tag, Element element) {
0785:                    HTMLTagType tagType;
0786:                    if (element.isEmpty()) {
0787:                        // in the case we found a lexical start tag but is defined
0788:                        // in the DTD as SIMPLE (17)
0789:                        tagType = HTMLTagType.SIMPLE;
0790:                    } else {
0791:                        tagType = tag.getType();
0792:                    }
0793:                    return tagType;
0794:                }
0795:
0796:                /*
0797:                 * If the element is not defined in the dtd (unrecognized) the RI
0798:                 * reports a SimpleTag
0799:                 */
0800:                private void handleUnrecognizedError(HTMLTag tag) {
0801:                    String tagName = tag.getName();
0802:                    int actualPos = currentStartPos;
0803:                    if (tag.getType() == HTMLTagType.END) {
0804:                        currentStartPos = tag.getEndPos() + 2; // to match RI
0805:                        error(HTMLErrorType.ERR_END_UNRECOGNIZED, tagName);
0806:                    } else {
0807:                        if (tag.getAttributes().isEmpty()) {
0808:                            currentStartPos = tag.getEndPos() + 1; // to match RI
0809:                        }
0810:                        error(HTMLErrorType.ERR_TAG_UNRECOGNIZED, tagName);
0811:                    }
0812:                    currentStartPos = actualPos;
0813:                }
0814:
0815:                private void handleUnsupportedJavaScript(Element element) {
0816:                    if (element.isScript()) {
0817:                        error(HTMLErrorType.ERR_JAVASCRIPT_UNSUPPORTED);
0818:                    }
0819:                }
0820:
0821:                private void handleTagAttr(HTMLTag htmlTag, Element element) {
0822:                    String tagName = htmlTag.getName();
0823:                    HTMLAttributeList attList = htmlTag.getHtmlAttributeList();
0824:                    while (attList != null) {
0825:                        String currentAttListName = attList.getAttributeName();
0826:
0827:                        /* Assign the attribute name (attr): object */
0828:                        Object attr = HTML.getAttributeKey(currentAttListName);
0829:                        if (attr == null) {
0830:                            attr = currentAttListName;
0831:                        }
0832:
0833:                        /* Report ERR_INVALID_TAG_ATTR */
0834:                        AttributeList currentAttList = element
0835:                                .getAttribute(currentAttListName);
0836:                        if (currentAttList == null) {
0837:                            // if the tag is not defined in the current element => invalid
0838:                            error(HTMLErrorType.ERR_INVALID_TAG_ATTR,
0839:                                    currentAttListName, tagName);
0840:                        }
0841:
0842:                        /*
0843:                         * Sets the attribute value (attrValue)
0844:                         * 
0845:                         * If the value is null then the value #DEFAULT is assigned.
0846:                         * If it's a valid one, the case of the value is respected. Otherwise, 
0847:                         * the value is moved to lower case  (same as RI)
0848:                         */
0849:
0850:                        String attrValue;
0851:                        if (attList.getAttributeValue() == null) {
0852:                            attrValue = currentAttList != null
0853:                                    && currentAttList.getValue() != null ? currentAttList
0854:                                    .getValue()
0855:                                    : HTMLTag.DEF_ATTR_VAL;
0856:                        } else {
0857:                            // FIXME: This seems to be a special case of RI.
0858:                            if (currentAttListName.equalsIgnoreCase(CLASS_ATTR)) {
0859:                                attrValue = attList.getAttributeValue()
0860:                                        .toString().toLowerCase();
0861:                            } else if (currentAttList != null) {
0862:                                attrValue = normalizeAttrVal(attList
0863:                                        .getAttributeValue().toString(),
0864:                                        currentAttList.getType());
0865:                            } else {
0866:                                attrValue = attList.getAttributeValue()
0867:                                        .toString();
0868:                            }
0869:                        }
0870:
0871:                        /* Reports ERR_MULTI_TAG_ATTR */
0872:                        if (attributes.isDefined(currentAttListName)) {
0873:                            error(HTMLErrorType.ERR_MULTI_TAG_ATTR,
0874:                                    currentAttListName, tagName);
0875:                        }
0876:                        // Overrides the value of an attribute if it was defined previously (same as RI)
0877:                        attributes.addAttribute(attr, attrValue);
0878:
0879:                        attList = attList.getNext();
0880:                    }
0881:
0882:                    for (Object attrName : element.getRequiredAttributes()) {
0883:                        if (!attributes.isDefined(attrName)) {
0884:                            error(HTMLErrorType.ERR_REQ_ATT, attrName
0885:                                    .toString(), tagName);
0886:                        }
0887:                    }
0888:
0889:                    currentStartPos = htmlTag.getOffset();
0890:                }
0891:
0892:                private String normalizeAttrVal(String attrVal, int type) {
0893:                    String str = attrVal;
0894:                    switch (type) {
0895:                    case DTDConstants.NMTOKEN:
0896:                    case DTDConstants.ID:
0897:                    case DTDConstants.NUTOKEN:
0898:                    case DTDConstants.NUMBER:
0899:                        str = str.toLowerCase();
0900:                    }
0901:
0902:                    return str;
0903:                }
0904:
0905:                /*
0906:                 * ********************************************************************
0907:                 * Implication methods
0908:                 * ********************************************************************
0909:                 */
0910:
0911:                /**
0912:                 * Makes the report of a {@link TagElement} as implied. Setting its
0913:                 * attributes and prompting an appropiated error message.
0914:                 * 
0915:                 * @param tag the {@link TagElement} been implied.
0916:                 */
0917:                private void reportImpliedTag(TagElement tag) {
0918:                    SimpleAttributeSet backup = attributes;
0919:                    attributes = new SimpleAttributeSet();
0920:                    attributes.addAttribute("_implied_", Boolean.TRUE);
0921:                    handleStartTag(tag);
0922:                    if (!tag.getElement().omitStart()) {
0923:                        error(HTMLErrorType.ERR_START_MISSING, tag.getElement()
0924:                                .getName());
0925:                    }
0926:                    attributes = backup;
0927:                }
0928:
0929:                /**
0930:                 * It makes the report of an {@link Element} that was implied.
0931:                 * 
0932:                 * @param elem the {@link Element} to be reported.
0933:                 */
0934:                private void reportImpliedEndTag(Element elem) {
0935:                    if (!elem.isEmpty()) {
0936:                        if (!elem.omitEnd()) {
0937:                            error(HTMLErrorType.ERR_END_MISSING, elem.getName());
0938:                        }
0939:                        handleEndTag(new TagElement(elem));
0940:                    }
0941:                }
0942:
0943:                /**
0944:                 * Adds a {@link TagElement} at the "top" of the parsing stack. This means
0945:                 * that it converts it into the new <code>currentElem</code>
0946:                 * 
0947:                 * @param elem the {@link Element} to be added at the top of the stack.
0948:                 *             If the {@link Element} is pcdata, then it is not added to
0949:                 *             the parsing stack.
0950:                 */
0951:                private void addAsCurrentElem(Element elem) {
0952:                    if (!elem.equals(dtd.pcdata)) {
0953:                        DefaultMutableTreeNode newNode = new DefaultMutableTreeNode(
0954:                                new TagElement(elem));
0955:                        if (elem.isScript()) {
0956:                            scriptDepth++;
0957:                        }
0958:                        if (currentElemNode != null) {
0959:                            currentElemNode.add(newNode);
0960:                        }
0961:                        currentElemNode = newNode;
0962:                        lastElemSeen = currentElemNode;
0963:                    }
0964:                }
0965:
0966:                /**
0967:                 * Sets the currentElem to the father of the actual currentElem. 
0968:                 *
0969:                 */
0970:                private void levelUp() {
0971:                    lastElemSeen = currentElemNode;
0972:                    if (currentElemNode != null
0973:                            && getNodeElement(currentElemNode).isScript()) {
0974:                        scriptDepth--;
0975:                    }
0976:                    currentElemNode = (DefaultMutableTreeNode) currentElemNode
0977:                            .getParent();
0978:                }
0979:
0980:                /**
0981:                 * Gets the {@link Element} stored in a {@link DefaultMutableTreeNode}
0982:                 * 
0983:                 * @param node the {@link DefaultMutableTreeNode} that is consulted.
0984:                 * @return the {@link Element} that is stored in the node, if node
0985:                 *             is not null. If node in null, then null is returned.
0986:                 */
0987:                private Element getNodeElement(DefaultMutableTreeNode node) {
0988:                    TagElement tag = getNodeTagElement(node);
0989:                    Element e = null;
0990:                    if (tag != null) {
0991:                        e = tag.getElement();
0992:                    }
0993:                    return e;
0994:                }
0995:
0996:                /**
0997:                 * Gets the {@link TagElement} stored in a
0998:                 * {@link DefaultMutableTreeNode}
0999:                 * 
1000:                 * @param node the {@link DefaultMutableTreeNode} that is consulted.
1001:                 * @return the {@link TagElement} that is stored in the node, if node
1002:                 *             is not null. If node in null, then null is returned.
1003:                 */
1004:                private TagElement getNodeTagElement(DefaultMutableTreeNode node) {
1005:                    return node == null ? null : (TagElement) node
1006:                            .getUserObject();
1007:                }
1008:
1009:                /**
1010:                 * Gets the {@link Tag} stored in a {@link DefaultMutableTreeNode}
1011:                 * 
1012:                 * @param node the {@link DefaultMutableTreeNode} that is consulted.
1013:                 * @return the {@link Tag} that is stored in the node, if node
1014:                 *             is not null. If node in null, then null is returned.
1015:                 */
1016:                private Tag getNodeTag(DefaultMutableTreeNode node) {
1017:                    TagElement tag = getNodeTagElement(node);
1018:                    Tag t = null;
1019:                    if (tag != null) {
1020:                        t = tag.getHTMLTag();
1021:                    }
1022:                    return t;
1023:                }
1024:
1025:                /**
1026:                 * 
1027:                 * @param e1 The element to search in e2's inclusions
1028:                 * @param e2 The element whose inclusions will be used to search for e1
1029:                 * @return true if e1 is present in e2's inclusions
1030:                 */
1031:                private boolean isIncluded(Element e1, Element e2) {
1032:                    boolean isIncluded = false;
1033:                    if (e1 != null && e2 != null) {
1034:                        BitSet bs = e2.inclusions;
1035:                        if (bs != null && 0 <= e1.getIndex()) {
1036:                            isIncluded = e2.inclusions.get(e1.getIndex());
1037:                        }
1038:                    }
1039:                    return isIncluded;
1040:                }
1041:
1042:                /**
1043:                 * 
1044:                 * @param e1 The element to search in e2's exclusions
1045:                 * @param e2 The element whose exclusions will be used to search for e1
1046:                 * @return true if e1 is present in e2's exclusions
1047:                 */
1048:                private boolean isExcluded(Element e1, Element e2) {
1049:                    boolean isExcluded = false;
1050:                    if (e1 != null && e2 != null) {
1051:                        BitSet bs = e2.exclusions;
1052:
1053:                        if (bs != null && 0 <= e1.getIndex()) {
1054:                            isExcluded = e2.exclusions.get(e1.getIndex());
1055:                        }
1056:                    }
1057:                    return isExcluded;
1058:                }
1059:
1060:                /**
1061:                 * Takes the neccesary steps to impply the required elements when in non
1062:                 * strict mode parsing.
1063:                 * 
1064:                 * @param e
1065:                 *            the {@link Element} for which implication is required.
1066:                 * @param endPos
1067:                 *            the position of the last character that conforms the
1068:                 *            actaul parsing element in the file.
1069:                 * @return true if the parsed element must be reported as an opening
1070:                 *         tag. Otherwise it returns false.
1071:                 */
1072:                private boolean nonStrictModeStartImplication(Element e,
1073:                        int endPos) {
1074:                    boolean mustBeTreated = true;
1075:                    int actualPos = currentStartPos;
1076:                    Element actualElem = getNodeElement(currentElemNode);
1077:                    List<Pair<Element, Boolean>> impliedElements;
1078:                    boolean implicationMade;
1079:
1080:                    // The element is in the exclusions list.
1081:                    if (isExcluded(e, actualElem)) {
1082:                        // We report the close of its father.
1083:                        reportImpliedEndTag(actualElem);
1084:                        levelUp();
1085:                        manageStartElement(e, endPos);
1086:                    } else if (isIncluded(e, actualElem)) {
1087:                        addAsCurrentElem(e);
1088:                    } else if (actualElem != null
1089:                            && actualElem.getContent() == null) {
1090:                        // The content model of the current element is null
1091:                        if (dtd.isRead() && !e.equals(dtd.pcdata)) {
1092:                            error(HTMLErrorType.ERR_TAG_UNEXPECTED, e.getName());
1093:                        }
1094:                        addAsCurrentElem(e);
1095:                    } else {
1096:                        List<Element> parsed = loadParsedElems(currentElemNode);
1097:                        Pair<List<Pair<Element, Boolean>>, Boolean> impliedInfo;
1098:                        impliedInfo = imply(actualElem, e, false, parsed);
1099:                        impliedElements = impliedInfo.getFirst();
1100:                        implicationMade = !impliedElements.isEmpty();
1101:                        updateParsingStack(impliedElements);
1102:
1103:                        actualElem = getNodeElement(currentElemNode);
1104:                        if (Boolean.FALSE.equals(impliedInfo.getSecond())) {
1105:                            if (e.omitStart()) {
1106:                                error(HTMLErrorType.ERR_TAG_IGNORE, e.getName());
1107:                                mustBeTreated = false;
1108:                            } else if (currentElemNode != null
1109:                                    && actualElem != null
1110:                                    && !actualElem.equals(dtd.body)
1111:                                    && actualElem.omitEnd()) {
1112:                                handleEndTag(new TagElement(actualElem, true));
1113:                                levelUp();
1114:                                nonStrictModeStartImplication(e, endPos);
1115:                                mustBeTreated = false;
1116:                            } else if (!e.equals(dtd.pcdata) && isDefined(e)) {
1117:                                currentStartPos = endPos;
1118:                                error(HTMLErrorType.ERR_TAG_UNEXPECTED, e
1119:                                        .getName());
1120:                                currentStartPos = actualPos;
1121:                            }
1122:                        }
1123:
1124:                        if (implicationMade && e.equals(dtd.pcdata)) {
1125:                            currentStartPos++; // to match RI
1126:                            error(HTMLErrorType.ERR_START_MISSING,
1127:                                    getNodeElement(currentElemNode).getName());
1128:                        }
1129:                        if (mustBeTreated) {
1130:                            addAsCurrentElem(e);
1131:                        }
1132:
1133:                    }
1134:                    return mustBeTreated;
1135:                }
1136:
1137:                /**
1138:                 * Takes the neccesary steps to impply the required elements when in 
1139:                 * strict mode parsing.
1140:                 *  
1141:                 * @param e
1142:                 *            the {@link Element} for which implication is required.
1143:                 * @return true if the parsed element must be reported as an opening
1144:                 *         tag. Otherwise it returns false.
1145:                 */
1146:                private boolean strictModeStartImplication(Element e) {
1147:                    Element ce = getNodeElement(currentElemNode);
1148:                    Pair<List<Pair<Element, Boolean>>, Boolean> implied;
1149:                    List<Element> parsed = loadParsedElems(currentElemNode);
1150:
1151:                    implied = imply(getNodeElement(currentElemNode), e, true,
1152:                            parsed);
1153:
1154:                    updateParsingStack(implied.getFirst());
1155:
1156:                    if (!e.equals(dtd.pcdata)
1157:                            && (isExcluded(e, ce) || implied.getSecond()
1158:                                    .equals(Boolean.FALSE))) {
1159:                        error(HTMLErrorType.ERR_TAG_UNEXPECTED, e.getName());
1160:                    }
1161:                    addAsCurrentElem(e);
1162:
1163:                    return true;
1164:                }
1165:
1166:                /**
1167:                 * Loads the already been parsed elements for a specific node in the
1168:                 * parsing tree into a {@link List}
1169:                 * 
1170:                 * @param node the node of the parsing tree that stores the
1171:                 *             {@link TagElement} element for which the already been
1172:                 *             parsed sub-elements want to be consulted.
1173:                 * 
1174:                 * @return a {@link List} with all the elements that has already been
1175:                 *         parsed for the element that is actually on the top of the
1176:                 *         parsing stack.
1177:                 */
1178:                private List<Element> loadParsedElems(
1179:                        DefaultMutableTreeNode node) {
1180:                    List<Element> parsed = new LinkedList<Element>();
1181:                    if (node != null) {
1182:                        Enumeration itr = node.children();
1183:                        while (itr.hasMoreElements()) {
1184:                            parsed
1185:                                    .add(getNodeElement((DefaultMutableTreeNode) itr
1186:                                            .nextElement()));
1187:                        }
1188:                    }
1189:                    return parsed;
1190:                }
1191:
1192:                /**
1193:                 * It makes the managent of implication when an opening tag is found.
1194:                 * 
1195:                 * @param e
1196:                 *            the {@link Element} that has been found in the document.
1197:                 * @param endPos
1198:                 *            the final position of the opening tag in the parsed file.
1199:                 * @return true if the <code>handleStartTag</code> method must be
1200:                 *         called over the current tag. Otherwise it returns false.
1201:                 */
1202:                private boolean manageStartElement(Element e, int endPos) {
1203:
1204:                    boolean mustBeReported;
1205:
1206:                    if (strict) {
1207:                        strictModeStartImplication(e);
1208:                        mustBeReported = true;
1209:                    } else {
1210:                        mustBeReported = nonStrictModeStartImplication(e,
1211:                                endPos);
1212:                    }
1213:
1214:                    return mustBeReported;
1215:                }
1216:
1217:                /**
1218:                 * It determines whether an {@link Element} was defined in the path that
1219:                 * takes from the <code>rootElem</code> to the <code>currentElem</code> in
1220:                 * the parsed tree.
1221:                 * 
1222:                 * @param elem the {@link Element} been searched.
1223:                 * @return true if the {@link Element} could be found in the path.
1224:                 *         Otherwise, false.
1225:                 */
1226:                private boolean isInActualPath(Element elem) {
1227:                    boolean found = false;
1228:
1229:                    Object[] path = currentElemNode.getUserObjectPath();
1230:                    for (Object obj : path) {
1231:                        found = found
1232:                                || ((TagElement) obj).getElement().getName()
1233:                                        .equalsIgnoreCase(elem.getName());
1234:                    }
1235:
1236:                    return found;
1237:                }
1238:
1239:                /**
1240:                 * Determines if a {@link TagElement} contains an {@link Element} that is 
1241:                 * defined in the current {@link DTD}
1242:                 *  
1243:                 * @param elem the {@link Element} been searched.
1244:                 * @return true if the {@link Element} is defined in the current
1245:                 *         {@link DTD}. Otherwise it returns false.         
1246:                 */
1247:                private boolean isDefined(Element elem) {
1248:                    return dtd.elementHash.containsKey(elem.getName()
1249:                            .toLowerCase());
1250:                }
1251:
1252:                /**
1253:                 * It manages the implication behaviour when a closing tag is found.
1254:                 * 
1255:                 * @param e the {@link Element} that was found in the document.
1256:                 * @return true if the handleEndTag must be reported over the current tag,
1257:                 *         of false otherwise.
1258:                 */
1259:                private boolean manageEndElement(Element e) {
1260:                    boolean mustBeReported = false;
1261:                    int actualPos = currentStartPos;
1262:
1263:                    if (currentElemNode == null) {
1264:                        // FIXME corroborate this position...
1265:                        currentStartPos = currentEndPos + 1; //to match RI 
1266:                        error(HTMLErrorType.ERR_END_EXTRA_TAG, e.getName());
1267:                    } else {
1268:                        if (isInActualPath(e)) {
1269:                            Element actualElem = getNodeElement(currentElemNode);
1270:                            if (!e.equals(actualElem) && !actualElem.omitEnd()
1271:                                    && !e.omitEnd()) {
1272:                                error(HTMLErrorType.ERR_TAG_IGNORE, e.getName());
1273:                            } else {
1274:                                while (!e.getName().equalsIgnoreCase(
1275:                                        actualElem.getName())) {
1276:                                    reportImpliedEndTag(actualElem);
1277:                                    //  the endTag should only be reported if the current 
1278:                                    // Element is not simple (EMPTY)
1279:                                    levelUp();
1280:                                    actualElem = getNodeElement(currentElemNode);
1281:                                }
1282:                                mustBeReported = !e.isEmpty();
1283:                                levelUp();
1284:                            }
1285:                        } else if (isDefined(e)) {
1286:                            error(HTMLErrorType.ERR_UNMATCHED_END_TAG, e
1287:                                    .getName());
1288:                        }
1289:
1290:                    }
1291:                    currentStartPos = actualPos;
1292:                    return mustBeReported;
1293:                }
1294:
1295:                /**
1296:                 * Gets the sequence of {@link Element} that may be implied to reach the
1297:                 * {@link Element} e.
1298:                 * 
1299:                 * @param actualElem
1300:                 *            the last known {@link Element}, and the one from which the
1301:                 *            implication should take place. It also defines the first
1302:                 *            {@link ContentModel} where the {@link Element} "e" should be
1303:                 *            searched.
1304:                 * @param e
1305:                 *            the {@link Element} been searched.
1306:                 * @param isSimple
1307:                 *            defines if the implication should take in consideration only
1308:                 *            the <code>HTML</code>, <code>HEAD</code> and <code>BODY</code>
1309:                 *            elements.
1310:                 * @param parsed
1311:                 *            the sequence of previosly found {@link Element} inside the
1312:                 *            {@link Element} "actualElem", but before the ocurrence of the
1313:                 *            {@link Element} "e".
1314:                 * @return a pair that contains the sequence of elements to be impplied and
1315:                 *         a {@link Boolean} that specifies if the {@link Element} could be
1316:                 *         really be place in such place, or it just the only possible solution
1317:                 *         to the implication problem.
1318:                 */
1319:                private Pair<List<Pair<Element, Boolean>>, Boolean> imply(
1320:                        Element actualElem, Element e, boolean isSimple,
1321:                        List<Element> parsed) {
1322:
1323:                    Pair<List<Pair<Element, Boolean>>, Boolean> impliedInfo = null;
1324:                    List<Pair<Element, Boolean>> implied = null;
1325:                    LinkedList<Pair<Element, Boolean>> path = new LinkedList<Pair<Element, Boolean>>();
1326:                    int depth = 1;
1327:                    boolean searchCompleted = false;
1328:                    Boolean found = Boolean.TRUE;
1329:
1330:                    if (actualElem != null) {
1331:                        if (actualElem.getContent() == null) {
1332:                            if (e.getType() == DTDConstants.MODEL) {
1333:                                reportImpliedEndTag(actualElem);
1334:                                levelUp();
1335:                                parsed = loadParsedElems(currentElemNode);
1336:                                impliedInfo = imply(
1337:                                        getNodeElement(currentElemNode), e,
1338:                                        false, parsed);
1339:                            }
1340:                        } else {
1341:                            implied = actualElem.getContent().implication(e,
1342:                                    parsed, false, 1, path);
1343:                            if (implied == null) {
1344:                                Element father = null;
1345:                                if (currentElemNode != null) {
1346:                                    DefaultMutableTreeNode fatherNode = (DefaultMutableTreeNode) currentElemNode
1347:                                            .getParent();
1348:                                    if (fatherNode != null) {
1349:                                        father = getNodeElement(fatherNode);
1350:                                        ContentModel fatherModel = father
1351:                                                .getContent();
1352:                                        if (fatherModel != null) {
1353:                                            parsed = loadParsedElems(fatherNode);
1354:                                            implied = fatherModel
1355:                                                    .implication(
1356:                                                            e,
1357:                                                            parsed,
1358:                                                            false,
1359:                                                            1,
1360:                                                            new LinkedList<Pair<Element, Boolean>>());
1361:                                        }
1362:                                    }
1363:                                }
1364:                                parsed = loadParsedElems(currentElemNode);
1365:                                if (implied != null || isIncluded(e, father)) {
1366:                                    reportImpliedEndTag(actualElem);
1367:                                    levelUp();
1368:                                    impliedInfo = imply(
1369:                                            getNodeElement(currentElemNode), e,
1370:                                            false, parsed);
1371:                                } else {
1372:                                    while (depth <= MAX_DEPTH
1373:                                            && !searchCompleted) {
1374:                                        // implied also checks if the element is in the
1375:                                        // inclusions of a potential implication path
1376:                                        implied = actualElem.getContent()
1377:                                                .implication(e, parsed, false,
1378:                                                        depth, path);
1379:                                        depth++;
1380:                                        path.clear();
1381:                                        searchCompleted = (implied != null || isSimple);
1382:                                    }
1383:                                }
1384:                            }
1385:                            found = impliedInfo != null ? impliedInfo
1386:                                    .getSecond() : implied != null;
1387:                        }
1388:
1389:                        if (found.equals(Boolean.FALSE)) {
1390:                            if (actualElem.equals(dtd.html) && !e.omitStart()) {
1391:                                implied = implied == null ? new LinkedList<Pair<Element, Boolean>>()
1392:                                        : implied;
1393:                                implied.add(new Pair<Element, Boolean>(
1394:                                        dtd.head, Boolean.TRUE));
1395:                                implied.add(new Pair<Element, Boolean>(
1396:                                        dtd.body, Boolean.FALSE));
1397:                            } else if (actualElem.equals(dtd.head)
1398:                                    && !e.omitStart()) {
1399:                                implied = implied == null ? new LinkedList<Pair<Element, Boolean>>()
1400:                                        : implied;
1401:                                reportImpliedEndTag(dtd.head);
1402:                                levelUp();
1403:                                implied.add(new Pair<Element, Boolean>(
1404:                                        dtd.body, Boolean.FALSE));
1405:                            }
1406:                            found = Boolean.FALSE;
1407:                        }
1408:
1409:                        if (implied != null && !implied.isEmpty()) {
1410:                            Element elem = (Element) ((Pair) implied.get(0))
1411:                                    .getFirst();
1412:                            while (!implied.isEmpty() && parsed.contains(elem)) {
1413:                                implied.remove(0);
1414:                                parsed = parsed
1415:                                        .subList(parsed.indexOf(elem) + 1,
1416:                                                parsed.size());
1417:                                if (!implied.isEmpty()) {
1418:                                    elem = (Element) ((Pair) implied.get(0))
1419:                                            .getFirst();
1420:                                }
1421:                            }
1422:                        }
1423:                    } else {
1424:                        found = Boolean.valueOf(e.equals(dtd.html));
1425:                    }
1426:
1427:                    if (actualElem == null && !found) {
1428:                        TagElement html = new TagElement(dtd.html, true);
1429:                        reportImpliedTag(html);
1430:                        addAsCurrentElem(dtd.html);
1431:                        impliedInfo = imply(dtd.html, e, false, parsed);
1432:                    } else {
1433:                        if (implied == null) {
1434:                            implied = new LinkedList<Pair<Element, Boolean>>();
1435:                        }
1436:                        impliedInfo = new Pair<List<Pair<Element, Boolean>>, Boolean>(
1437:                                implied, found);
1438:                    }
1439:                    return impliedInfo;
1440:                }
1441:
1442:                private void updateParsingStack(
1443:                        List<Pair<Element, Boolean>> implied) {
1444:                    while (!implied.isEmpty()) {
1445:                        Pair<Element, Boolean> impliedPair = implied.remove(0);
1446:                        TagElement impliedTag = new TagElement(impliedPair
1447:                                .getFirst(), true);
1448:                        reportImpliedTag(impliedTag);
1449:                        Element element = impliedTag.getElement();
1450:                        addAsCurrentElem(element);
1451:                        if (impliedPair.getSecond().equals(Boolean.TRUE)
1452:                                && !element.isEmpty()) {
1453:                            handleEndTag(impliedTag);
1454:                            levelUp();
1455:                        }
1456:                    }
1457:                }
1458:            }
1459:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.