Source Code Cross Referenced for XMLScanner.java in » Graphic-Library » batik » org » apache » batik » xml » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Graphic Library » batik » org.apache.batik.xml
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:
0003:           Licensed to the Apache Software Foundation (ASF) under one or more
0004:           contributor license agreements.  See the NOTICE file distributed with
0005:           this work for additional information regarding copyright ownership.
0006:           The ASF licenses this file to You under the Apache License, Version 2.0
0007:           (the "License"); you may not use this file except in compliance with
0008:           the License.  You may obtain a copy of the License at
0009:
0010:               http://www.apache.org/licenses/LICENSE-2.0
0011:
0012:           Unless required by applicable law or agreed to in writing, software
0013:           distributed under the License is distributed on an "AS IS" BASIS,
0014:           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0015:           See the License for the specific language governing permissions and
0016:           limitations under the License.
0017:
0018:         */
0019:        package org.apache.batik.xml;
0020:
0021:        import java.io.IOException;
0022:        import java.io.InputStream;
0023:        import java.io.Reader;
0024:        import java.util.Locale;
0025:        import java.util.MissingResourceException;
0026:
0027:        import org.apache.batik.i18n.Localizable;
0028:        import org.apache.batik.i18n.LocalizableSupport;
0029:        import org.apache.batik.util.io.NormalizingReader;
0030:        import org.apache.batik.util.io.StreamNormalizingReader;
0031:        import org.apache.batik.util.io.StringNormalizingReader;
0032:
0033:        /**
0034:         * This class represents a scanner for XML documents.
0035:         *
0036:         * @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
0037:         * @version $Id: XMLScanner.java 489226 2006-12-21 00:05:36Z cam $
0038:         */
0039:        public class XMLScanner implements  Localizable {
0040:
0041:            /**
0042:             * The document start context.
0043:             */
0044:            public static final int DOCUMENT_START_CONTEXT = 0;
0045:
0046:            /**
0047:             * The top level context.
0048:             */
0049:            public static final int TOP_LEVEL_CONTEXT = 1;
0050:
0051:            /**
0052:             * The processing instruction context.
0053:             */
0054:            public static final int PI_CONTEXT = 2;
0055:
0056:            /**
0057:             * The XML declaration context.
0058:             */
0059:            public static final int XML_DECL_CONTEXT = 3;
0060:
0061:            /**
0062:             * The doctype context.
0063:             */
0064:            public static final int DOCTYPE_CONTEXT = 4;
0065:
0066:            /**
0067:             * The start tag context.
0068:             */
0069:            public static final int START_TAG_CONTEXT = 5;
0070:
0071:            /**
0072:             * The content context.
0073:             */
0074:            public static final int CONTENT_CONTEXT = 6;
0075:
0076:            /**
0077:             * The DTD declarations context.
0078:             */
0079:            public static final int DTD_DECLARATIONS_CONTEXT = 7;
0080:
0081:            /**
0082:             * The CDATA section context.
0083:             */
0084:            public static final int CDATA_SECTION_CONTEXT = 8;
0085:
0086:            /**
0087:             * The end tag context.
0088:             */
0089:            public static final int END_TAG_CONTEXT = 9;
0090:
0091:            /**
0092:             * The attribute value context.
0093:             */
0094:            public static final int ATTRIBUTE_VALUE_CONTEXT = 10;
0095:
0096:            /**
0097:             * The ATTLIST context.
0098:             */
0099:            public static final int ATTLIST_CONTEXT = 11;
0100:
0101:            /**
0102:             * The element declaration context.
0103:             */
0104:            public static final int ELEMENT_DECLARATION_CONTEXT = 12;
0105:
0106:            /**
0107:             * The entity context.
0108:             */
0109:            public static final int ENTITY_CONTEXT = 13;
0110:
0111:            /**
0112:             * The notation context.
0113:             */
0114:            public static final int NOTATION_CONTEXT = 14;
0115:
0116:            /**
0117:             * The notation type context.
0118:             */
0119:            public static final int NOTATION_TYPE_CONTEXT = 15;
0120:
0121:            /**
0122:             * The enumeration context.
0123:             */
0124:            public static final int ENUMERATION_CONTEXT = 16;
0125:
0126:            /**
0127:             * The entity value context.
0128:             */
0129:            public static final int ENTITY_VALUE_CONTEXT = 17;
0130:
0131:            /**
0132:             * The default resource bundle base name.
0133:             */
0134:            protected static final String BUNDLE_CLASSNAME = "org.apache.batik.xml.resources.Messages";
0135:
0136:            /**
0137:             * The localizable support.
0138:             */
0139:            protected LocalizableSupport localizableSupport = new LocalizableSupport(
0140:                    BUNDLE_CLASSNAME, XMLScanner.class.getClassLoader());
0141:
0142:            /**
0143:             * The reader.
0144:             */
0145:            protected NormalizingReader reader;
0146:
0147:            /**
0148:             * The current char.
0149:             */
0150:            protected int current;
0151:
0152:            /**
0153:             * The type of the current lexical unit.
0154:             */
0155:            protected int type;
0156:
0157:            /**
0158:             * The recording buffer.
0159:             */
0160:            protected char[] buffer = new char[1024];
0161:
0162:            /**
0163:             * The current position in the buffer.
0164:             */
0165:            protected int position;
0166:
0167:            /**
0168:             * The start offset of the last lexical unit.
0169:             */
0170:            protected int start;
0171:
0172:            /**
0173:             * The end offset of the last lexical unit.
0174:             */
0175:            protected int end;
0176:
0177:            /**
0178:             * The current scanning context.
0179:             */
0180:            protected int context;
0181:
0182:            /**
0183:             * The depth in the xml tree.
0184:             */
0185:            protected int depth;
0186:
0187:            /**
0188:             * A PI end has been previously read.
0189:             */
0190:            protected boolean piEndRead;
0191:
0192:            /**
0193:             * The scanner is in the internal DTD.
0194:             */
0195:            protected boolean inDTD;
0196:
0197:            /**
0198:             * The last attribute delimiter encountered.
0199:             */
0200:            protected char attrDelimiter;
0201:
0202:            /**
0203:             * A CDATA section end is the next token
0204:             */
0205:            protected boolean cdataEndRead;
0206:
0207:            /**
0208:             * Creates a new XML scanner.
0209:             * @param r The reader to scan.
0210:             */
0211:            public XMLScanner(Reader r) throws XMLException {
0212:                context = DOCUMENT_START_CONTEXT;
0213:                try {
0214:                    reader = new StreamNormalizingReader(r);
0215:                    current = nextChar();
0216:                } catch (IOException e) {
0217:                    throw new XMLException(e);
0218:                }
0219:            }
0220:
0221:            /**
0222:             * Creates a new XML scanner.
0223:             * @param is The input stream to scan.
0224:             * @param enc The character encoding to use.
0225:             */
0226:            public XMLScanner(InputStream is, String enc) throws XMLException {
0227:                context = DOCUMENT_START_CONTEXT;
0228:                try {
0229:                    reader = new StreamNormalizingReader(is, enc);
0230:                    current = nextChar();
0231:                } catch (IOException e) {
0232:                    throw new XMLException(e);
0233:                }
0234:            }
0235:
0236:            /**
0237:             * Creates a new XML scanner.
0238:             * @param s The string to parse.
0239:             */
0240:            public XMLScanner(String s) throws XMLException {
0241:                context = DOCUMENT_START_CONTEXT;
0242:                try {
0243:                    reader = new StringNormalizingReader(s);
0244:                    current = nextChar();
0245:                } catch (IOException e) {
0246:                    throw new XMLException(e);
0247:                }
0248:            }
0249:
0250:            /**
0251:             * Implements {@link org.apache.batik.i18n.Localizable#setLocale(Locale)}.
0252:             */
0253:            public void setLocale(Locale l) {
0254:                localizableSupport.setLocale(l);
0255:            }
0256:
0257:            /**
0258:             * Implements {@link org.apache.batik.i18n.Localizable#getLocale()}.
0259:             */
0260:            public Locale getLocale() {
0261:                return localizableSupport.getLocale();
0262:            }
0263:
0264:            /**
0265:             * Implements {@link
0266:             * org.apache.batik.i18n.Localizable#formatMessage(String,Object[])}.
0267:             */
0268:            public String formatMessage(String key, Object[] args)
0269:                    throws MissingResourceException {
0270:                return localizableSupport.formatMessage(key, args);
0271:            }
0272:
0273:            /**
0274:             * Sets the current depth in the XML tree.
0275:             */
0276:            public void setDepth(int i) {
0277:                depth = i;
0278:            }
0279:
0280:            /**
0281:             * Returns the current depth in the XML tree.
0282:             */
0283:            public int getDepth() {
0284:                return depth;
0285:            }
0286:
0287:            /**
0288:             * Sets the current context.
0289:             */
0290:            public void setContext(int c) {
0291:                context = c;
0292:            }
0293:
0294:            /**
0295:             * Returns the current context.
0296:             */
0297:            public int getContext() {
0298:                return context;
0299:            }
0300:
0301:            /**
0302:             * The current lexical unit type like defined in LexicalUnits.
0303:             */
0304:            public int getType() {
0305:                return type;
0306:            }
0307:
0308:            /**
0309:             * Returns the current line.
0310:             */
0311:            public int getLine() {
0312:                return reader.getLine();
0313:            }
0314:
0315:            /**
0316:             * Returns the current column.
0317:             */
0318:            public int getColumn() {
0319:                return reader.getColumn();
0320:            }
0321:
0322:            /**
0323:             * Returns the buffer used to store the chars.
0324:             */
0325:            public char[] getBuffer() {
0326:                return buffer;
0327:            }
0328:
0329:            /**
0330:             * Returns the start offset of the last lexical unit.
0331:             */
0332:            public int getStart() {
0333:                return start;
0334:            }
0335:
0336:            /**
0337:             * Returns the end offset of the last lexical unit.
0338:             */
0339:            public int getEnd() {
0340:                return end;
0341:            }
0342:
0343:            /**
0344:             * Returns the last encountered string delimiter.
0345:             */
0346:            public char getStringDelimiter() {
0347:                return attrDelimiter;
0348:            }
0349:
0350:            /**
0351:             * Returns the start offset of the current lexical unit.
0352:             */
0353:            public int getStartOffset() {
0354:                switch (type) {
0355:                case LexicalUnits.SECTION_END:
0356:                    return -3;
0357:
0358:                case LexicalUnits.PI_END:
0359:                    return -2;
0360:
0361:                case LexicalUnits.STRING:
0362:                case LexicalUnits.ENTITY_REFERENCE:
0363:                case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
0364:                case LexicalUnits.START_TAG:
0365:                case LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT:
0366:                    return 1;
0367:
0368:                case LexicalUnits.PI_START:
0369:                case LexicalUnits.END_TAG:
0370:                case LexicalUnits.CHARACTER_REFERENCE:
0371:                    return 2;
0372:
0373:                case LexicalUnits.COMMENT:
0374:                    return 4;
0375:
0376:                default:
0377:                    return 0;
0378:                }
0379:            }
0380:
0381:            /**
0382:             * Returns the end offset of the current lexical unit.
0383:             */
0384:            public int getEndOffset() {
0385:                switch (type) {
0386:                case LexicalUnits.STRING:
0387:                case LexicalUnits.ENTITY_REFERENCE:
0388:                case LexicalUnits.CHARACTER_REFERENCE:
0389:                case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
0390:                case LexicalUnits.LAST_ATTRIBUTE_FRAGMENT:
0391:                    return -1;
0392:
0393:                case LexicalUnits.PI_DATA:
0394:                    return -2;
0395:
0396:                case LexicalUnits.COMMENT:
0397:                    return -3;
0398:
0399:                case LexicalUnits.CHARACTER_DATA:
0400:                    if (cdataEndRead) {
0401:                        return -3;
0402:                    }
0403:                    return 0;
0404:
0405:                default:
0406:                    return 0;
0407:                }
0408:            }
0409:
0410:            /**
0411:             * Clears the buffer.
0412:             */
0413:            public void clearBuffer() {
0414:                if (position <= 0) {
0415:                    position = 0;
0416:                } else {
0417:                    buffer[0] = buffer[position - 1];
0418:                    position = 1;
0419:                }
0420:            }
0421:
0422:            /**
0423:             * Advances to the next lexical unit.
0424:             * @return The type of the lexical unit like defined in LexicalUnits.
0425:             */
0426:            public int next() throws XMLException {
0427:                return next(context);
0428:            }
0429:
0430:            /**
0431:             * Advances to the next lexical unit.
0432:             * @param ctx The context to use for scanning.
0433:             * @return The type of the lexical unit like defined in LexicalUnits.
0434:             */
0435:            public int next(int ctx) throws XMLException {
0436:                start = position - 1;
0437:                try {
0438:                    switch (ctx) {
0439:                    case DOCUMENT_START_CONTEXT:
0440:                        type = nextInDocumentStart();
0441:                        break;
0442:
0443:                    case TOP_LEVEL_CONTEXT:
0444:                        type = nextInTopLevel();
0445:                        break;
0446:
0447:                    case PI_CONTEXT:
0448:                        type = nextInPI();
0449:                        break;
0450:
0451:                    case START_TAG_CONTEXT:
0452:                        type = nextInStartTag();
0453:                        break;
0454:
0455:                    case ATTRIBUTE_VALUE_CONTEXT:
0456:                        type = nextInAttributeValue();
0457:                        break;
0458:
0459:                    case CONTENT_CONTEXT:
0460:                        type = nextInContent();
0461:                        break;
0462:
0463:                    case END_TAG_CONTEXT:
0464:                        type = nextInEndTag();
0465:                        break;
0466:
0467:                    case CDATA_SECTION_CONTEXT:
0468:                        type = nextInCDATASection();
0469:                        break;
0470:
0471:                    case XML_DECL_CONTEXT:
0472:                        type = nextInXMLDecl();
0473:                        break;
0474:
0475:                    case DOCTYPE_CONTEXT:
0476:                        type = nextInDoctype();
0477:                        break;
0478:
0479:                    case DTD_DECLARATIONS_CONTEXT:
0480:                        type = nextInDTDDeclarations();
0481:                        break;
0482:
0483:                    case ELEMENT_DECLARATION_CONTEXT:
0484:                        type = nextInElementDeclaration();
0485:                        break;
0486:
0487:                    case ATTLIST_CONTEXT:
0488:                        type = nextInAttList();
0489:                        break;
0490:
0491:                    case NOTATION_CONTEXT:
0492:                        type = nextInNotation();
0493:                        break;
0494:
0495:                    case ENTITY_CONTEXT:
0496:                        type = nextInEntity();
0497:                        break;
0498:
0499:                    case ENTITY_VALUE_CONTEXT:
0500:                        return nextInEntityValue();
0501:
0502:                    case NOTATION_TYPE_CONTEXT:
0503:                        return nextInNotationType();
0504:
0505:                    case ENUMERATION_CONTEXT:
0506:                        return nextInEnumeration();
0507:
0508:                    default:
0509:                        throw new IllegalArgumentException("unexpected ctx:"
0510:                                + ctx);
0511:                    }
0512:                } catch (IOException e) {
0513:                    throw new XMLException(e);
0514:                }
0515:                end = position - ((current == -1) ? 0 : 1);
0516:                return type;
0517:            }
0518:
0519:            /**
0520:             * Reads the first token in the stream.
0521:             */
0522:            protected int nextInDocumentStart() throws IOException,
0523:                    XMLException {
0524:                switch (current) {
0525:                case 0x9:
0526:                case 0xA:
0527:                case 0xD:
0528:                case 0x20:
0529:                    do {
0530:                        nextChar();
0531:                    } while (current != -1
0532:                            && XMLUtilities.isXMLSpace((char) current));
0533:                    context = (depth == 0) ? TOP_LEVEL_CONTEXT
0534:                            : CONTENT_CONTEXT;
0535:                    return LexicalUnits.S;
0536:
0537:                case '<':
0538:                    switch (nextChar()) {
0539:                    case '?':
0540:                        int c1 = nextChar();
0541:                        if (c1 == -1
0542:                                || !XMLUtilities
0543:                                        .isXMLNameFirstCharacter((char) c1)) {
0544:                            throw createXMLException("invalid.pi.target");
0545:                        }
0546:                        context = PI_CONTEXT;
0547:                        int c2 = nextChar();
0548:                        if (c2 == -1
0549:                                || !XMLUtilities.isXMLNameCharacter((char) c2)) {
0550:                            return LexicalUnits.PI_START;
0551:                        }
0552:                        int c3 = nextChar();
0553:                        if (c3 == -1
0554:                                || !XMLUtilities.isXMLNameCharacter((char) c3)) {
0555:                            return LexicalUnits.PI_START;
0556:                        }
0557:                        int c4 = nextChar();
0558:                        if (c4 != -1
0559:                                && XMLUtilities.isXMLNameCharacter((char) c4)) {
0560:                            do {
0561:                                nextChar();
0562:                            } while (current != -1
0563:                                    && XMLUtilities
0564:                                            .isXMLNameCharacter((char) current));
0565:                            return LexicalUnits.PI_START;
0566:                        }
0567:                        if (c1 == 'x' && c2 == 'm' && c3 == 'l') {
0568:                            context = XML_DECL_CONTEXT;
0569:                            return LexicalUnits.XML_DECL_START;
0570:                        }
0571:                        if ((c1 == 'x' || c1 == 'X')
0572:                                && (c2 == 'm' || c2 == 'M')
0573:                                && (c3 == 'l' || c3 == 'L')) {
0574:                            throw createXMLException("xml.reserved");
0575:                        }
0576:                        return LexicalUnits.PI_START;
0577:
0578:                    case '!':
0579:                        switch (nextChar()) {
0580:                        case '-':
0581:                            return readComment();
0582:
0583:                        case 'D':
0584:                            context = DOCTYPE_CONTEXT;
0585:                            return readIdentifier("OCTYPE",
0586:                                    LexicalUnits.DOCTYPE_START, -1);
0587:
0588:                        default:
0589:                            throw createXMLException("invalid.doctype");
0590:                        }
0591:
0592:                    default:
0593:                        context = START_TAG_CONTEXT;
0594:                        depth++;
0595:                        return readName(LexicalUnits.START_TAG);
0596:                    }
0597:
0598:                case -1:
0599:                    return LexicalUnits.EOF;
0600:
0601:                default:
0602:                    if (depth == 0) {
0603:                        throw createXMLException("invalid.character");
0604:                    } else {
0605:                        return nextInContent();
0606:                    }
0607:                }
0608:            }
0609:
0610:            /**
0611:             * Advances to the next lexical unit in the top level context.
0612:             * @return The type of the lexical unit like defined in LexicalUnits.
0613:             */
0614:            protected int nextInTopLevel() throws IOException, XMLException {
0615:                switch (current) {
0616:                case 0x9:
0617:                case 0xA:
0618:                case 0xD:
0619:                case 0x20:
0620:                    do {
0621:                        nextChar();
0622:                    } while (current != -1
0623:                            && XMLUtilities.isXMLSpace((char) current));
0624:                    return LexicalUnits.S;
0625:
0626:                case '<':
0627:                    switch (nextChar()) {
0628:                    case '?':
0629:                        context = PI_CONTEXT;
0630:                        return readPIStart();
0631:
0632:                    case '!':
0633:                        switch (nextChar()) {
0634:                        case '-':
0635:                            return readComment();
0636:
0637:                        case 'D':
0638:                            context = DOCTYPE_CONTEXT;
0639:                            return readIdentifier("OCTYPE",
0640:                                    LexicalUnits.DOCTYPE_START, -1);
0641:
0642:                        default:
0643:                            throw createXMLException("invalid.character");
0644:                        }
0645:                    default:
0646:                        context = START_TAG_CONTEXT;
0647:                        depth++;
0648:                        return readName(LexicalUnits.START_TAG);
0649:                    }
0650:
0651:                case -1:
0652:                    return LexicalUnits.EOF;
0653:
0654:                default:
0655:                    throw createXMLException("invalid.character");
0656:                }
0657:            }
0658:
0659:            /**
0660:             * Returns the next lexical unit in the context of a processing
0661:             * instruction.
0662:             */
0663:            protected int nextInPI() throws IOException, XMLException {
0664:                if (piEndRead) {
0665:                    piEndRead = false;
0666:                    context = (depth == 0) ? TOP_LEVEL_CONTEXT
0667:                            : CONTENT_CONTEXT;
0668:                    return LexicalUnits.PI_END;
0669:                }
0670:
0671:                switch (current) {
0672:                case 0x9:
0673:                case 0xA:
0674:                case 0xD:
0675:                case 0x20:
0676:                    do {
0677:                        nextChar();
0678:                    } while (current != -1
0679:                            && XMLUtilities.isXMLSpace((char) current));
0680:                    return LexicalUnits.S;
0681:                case '?':
0682:                    if (nextChar() != '>') {
0683:                        throw createXMLException("pi.end.expected");
0684:                    }
0685:                    nextChar();
0686:                    if (inDTD) {
0687:                        context = DTD_DECLARATIONS_CONTEXT;
0688:                    } else if (depth == 0) {
0689:                        context = TOP_LEVEL_CONTEXT;
0690:                    } else {
0691:                        context = CONTENT_CONTEXT;
0692:                    }
0693:                    return LexicalUnits.PI_END;
0694:
0695:                default:
0696:                    do {
0697:                        do {
0698:                            nextChar();
0699:                        } while (current != -1 && current != '?');
0700:                        nextChar();
0701:                    } while (current != -1 && current != '>');
0702:                    nextChar();
0703:                    piEndRead = true;
0704:                    return LexicalUnits.PI_DATA;
0705:                }
0706:            }
0707:
0708:            /**
0709:             * Returns the next lexical unit in the context of a start tag.
0710:             */
0711:            protected int nextInStartTag() throws IOException, XMLException {
0712:                switch (current) {
0713:                case 0x9:
0714:                case 0xA:
0715:                case 0xD:
0716:                case 0x20:
0717:                    do {
0718:                        nextChar();
0719:                    } while (current != -1
0720:                            && XMLUtilities.isXMLSpace((char) current));
0721:                    return LexicalUnits.S;
0722:
0723:                case '/':
0724:                    if (nextChar() != '>') {
0725:                        throw createXMLException("malformed.tag.end");
0726:                    }
0727:                    nextChar();
0728:                    context = (--depth == 0) ? TOP_LEVEL_CONTEXT
0729:                            : CONTENT_CONTEXT;
0730:                    return LexicalUnits.EMPTY_ELEMENT_END;
0731:
0732:                case '>':
0733:                    nextChar();
0734:                    context = CONTENT_CONTEXT;
0735:                    return LexicalUnits.END_CHAR;
0736:
0737:                case '=':
0738:                    nextChar();
0739:                    return LexicalUnits.EQ;
0740:
0741:                case '"':
0742:                    attrDelimiter = '"';
0743:                    nextChar();
0744:
0745:                    for (;;) {
0746:                        switch (current) {
0747:                        case '"':
0748:                            nextChar();
0749:                            return LexicalUnits.STRING;
0750:
0751:                        case '&':
0752:                            context = ATTRIBUTE_VALUE_CONTEXT;
0753:                            return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
0754:
0755:                        case '<':
0756:                            throw createXMLException("invalid.character");
0757:
0758:                        case -1:
0759:                            throw createXMLException("unexpected.eof");
0760:                        }
0761:                        nextChar();
0762:                    }
0763:
0764:                case '\'':
0765:                    attrDelimiter = '\'';
0766:                    nextChar();
0767:
0768:                    for (;;) {
0769:                        switch (current) {
0770:                        case '\'':
0771:                            nextChar();
0772:                            return LexicalUnits.STRING;
0773:
0774:                        case '&':
0775:                            context = ATTRIBUTE_VALUE_CONTEXT;
0776:                            return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
0777:
0778:                        case '<':
0779:                            throw createXMLException("invalid.character");
0780:
0781:                        case -1:
0782:                            throw createXMLException("unexpected.eof");
0783:                        }
0784:                        nextChar();
0785:                    }
0786:
0787:                default:
0788:                    return readName(LexicalUnits.NAME);
0789:                }
0790:            }
0791:
0792:            /**
0793:             * Returns the next lexical unit in the context of an attribute value.
0794:             */
0795:            protected int nextInAttributeValue() throws IOException,
0796:                    XMLException {
0797:                if (current == -1) {
0798:                    return LexicalUnits.EOF;
0799:                }
0800:
0801:                if (current == '&') {
0802:                    return readReference();
0803:
0804:                } else {
0805:                    loop: for (;;) {
0806:                        switch (current) {
0807:                        case '&':
0808:                        case '<':
0809:                        case -1:
0810:                            break loop;
0811:                        case '"':
0812:                        case '\'':
0813:                            if (current == attrDelimiter) {
0814:                                break loop;
0815:                            }
0816:                        }
0817:                        nextChar();
0818:                    }
0819:
0820:                    switch (current) {
0821:                    case -1:
0822:                        break;
0823:
0824:                    case '<':
0825:                        throw createXMLException("invalid.character");
0826:
0827:                    case '&':
0828:                        return LexicalUnits.ATTRIBUTE_FRAGMENT;
0829:
0830:                    case '\'':
0831:                    case '"':
0832:                        nextChar();
0833:                        if (inDTD) {
0834:                            context = ATTLIST_CONTEXT;
0835:                        } else {
0836:                            context = START_TAG_CONTEXT;
0837:                        }
0838:                    }
0839:                    return LexicalUnits.LAST_ATTRIBUTE_FRAGMENT;
0840:                }
0841:            }
0842:
0843:            /**
0844:             * Returns the next lexical unit in the context of an element content.
0845:             */
0846:            protected int nextInContent() throws IOException, XMLException {
0847:                switch (current) {
0848:                case -1:
0849:                    return LexicalUnits.EOF;
0850:
0851:                case '&':
0852:                    return readReference();
0853:
0854:                case '<':
0855:                    switch (nextChar()) {
0856:                    case '?':
0857:                        context = PI_CONTEXT;
0858:                        return readPIStart();
0859:
0860:                    case '!':
0861:                        switch (nextChar()) {
0862:                        case '-':
0863:                            return readComment();
0864:                        case '[':
0865:                            context = CDATA_SECTION_CONTEXT;
0866:                            return readIdentifier("CDATA[",
0867:                                    LexicalUnits.CDATA_START, -1);
0868:                        default:
0869:                            throw createXMLException("invalid.character");
0870:                        }
0871:
0872:                    case '/':
0873:                        nextChar();
0874:                        context = END_TAG_CONTEXT;
0875:                        return readName(LexicalUnits.END_TAG);
0876:
0877:                    default:
0878:                        depth++;
0879:                        context = START_TAG_CONTEXT;
0880:                        return readName(LexicalUnits.START_TAG);
0881:                    }
0882:
0883:                default:
0884:                    loop: for (;;) {
0885:                        switch (current) {
0886:                        default:
0887:                            nextChar();
0888:                            break;
0889:
0890:                        case -1:
0891:                        case '&':
0892:                        case '<':
0893:                            break loop;
0894:                        }
0895:                    }
0896:                    return LexicalUnits.CHARACTER_DATA;
0897:                }
0898:            }
0899:
0900:            /**
0901:             * Returns the next lexical unit in the context of a end tag.
0902:             */
0903:            protected int nextInEndTag() throws IOException, XMLException {
0904:                switch (current) {
0905:                case 0x9:
0906:                case 0xA:
0907:                case 0xD:
0908:                case 0x20:
0909:                    do {
0910:                        nextChar();
0911:                    } while (current != -1
0912:                            && XMLUtilities.isXMLSpace((char) current));
0913:                    return LexicalUnits.S;
0914:
0915:                case '>':
0916:                    if (--depth < 0) {
0917:                        throw createXMLException("unexpected.end.tag");
0918:                    } else if (depth == 0) {
0919:                        context = TOP_LEVEL_CONTEXT;
0920:                    } else {
0921:                        context = CONTENT_CONTEXT;
0922:                    }
0923:                    nextChar();
0924:                    return LexicalUnits.END_CHAR;
0925:
0926:                default:
0927:                    throw createXMLException("invalid.character");
0928:                }
0929:            }
0930:
0931:            /**
0932:             * Returns the next lexical unit in the context of a CDATA section.
0933:             */
0934:            protected int nextInCDATASection() throws IOException, XMLException {
0935:                if (cdataEndRead) {
0936:                    cdataEndRead = false;
0937:                    context = CONTENT_CONTEXT;
0938:                    return LexicalUnits.SECTION_END;
0939:                }
0940:
0941:                while (current != -1) {
0942:                    while (current != ']' && current != -1) {
0943:                        nextChar();
0944:                    }
0945:                    if (current != -1) {
0946:                        nextChar();
0947:                        if (current == ']') {
0948:                            nextChar();
0949:                            if (current == '>') {
0950:                                break;
0951:                            }
0952:                        }
0953:                    }
0954:                }
0955:                if (current == -1) {
0956:                    throw createXMLException("unexpected.eof");
0957:                }
0958:                nextChar();
0959:                cdataEndRead = true;
0960:                return LexicalUnits.CHARACTER_DATA;
0961:            }
0962:
0963:            /**
0964:             * Returns the next lexical unit in the context of an XML declaration.
0965:             */
0966:            protected int nextInXMLDecl() throws IOException, XMLException {
0967:                switch (current) {
0968:                case 0x9:
0969:                case 0xA:
0970:                case 0xD:
0971:                case 0x20:
0972:                    do {
0973:                        nextChar();
0974:                    } while (current != -1
0975:                            && XMLUtilities.isXMLSpace((char) current));
0976:                    return LexicalUnits.S;
0977:                case 'v':
0978:                    return readIdentifier("ersion",
0979:                            LexicalUnits.VERSION_IDENTIFIER, -1);
0980:                case 'e':
0981:                    return readIdentifier("ncoding",
0982:                            LexicalUnits.ENCODING_IDENTIFIER, -1);
0983:                case 's':
0984:                    return readIdentifier("tandalone",
0985:                            LexicalUnits.STANDALONE_IDENTIFIER, -1);
0986:                case '=':
0987:                    nextChar();
0988:                    return LexicalUnits.EQ;
0989:
0990:                case '?':
0991:                    nextChar();
0992:                    if (current != '>') {
0993:                        throw createXMLException("pi.end.expected");
0994:                    }
0995:                    nextChar();
0996:                    context = TOP_LEVEL_CONTEXT;
0997:                    return LexicalUnits.PI_END;
0998:
0999:                case '"':
1000:                    attrDelimiter = '"';
1001:                    return readString();
1002:
1003:                case '\'':
1004:                    attrDelimiter = '\'';
1005:                    return readString();
1006:
1007:                default:
1008:                    throw createXMLException("invalid.character");
1009:                }
1010:            }
1011:
1012:            /**
1013:             * Returns the next lexical unit in the context of a doctype.
1014:             */
1015:            protected int nextInDoctype() throws IOException, XMLException {
1016:                switch (current) {
1017:                case 0x9:
1018:                case 0xA:
1019:                case 0xD:
1020:                case 0x20:
1021:                    do {
1022:                        nextChar();
1023:                    } while (current != -1
1024:                            && XMLUtilities.isXMLSpace((char) current));
1025:                    return LexicalUnits.S;
1026:
1027:                case '>':
1028:                    nextChar();
1029:                    context = TOP_LEVEL_CONTEXT;
1030:                    return LexicalUnits.END_CHAR;
1031:
1032:                case 'S':
1033:                    return readIdentifier("YSTEM",
1034:                            LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1035:
1036:                case 'P':
1037:                    return readIdentifier("UBLIC",
1038:                            LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1039:
1040:                case '"':
1041:                    attrDelimiter = '"';
1042:                    return readString();
1043:
1044:                case '\'':
1045:                    attrDelimiter = '\'';
1046:                    return readString();
1047:
1048:                case '[':
1049:                    nextChar();
1050:                    context = DTD_DECLARATIONS_CONTEXT;
1051:                    inDTD = true;
1052:                    return LexicalUnits.LSQUARE_BRACKET;
1053:
1054:                default:
1055:                    return readName(LexicalUnits.NAME);
1056:                }
1057:            }
1058:
1059:            /**
1060:             * Returns the next lexical unit in the context dtd declarations.
1061:             */
1062:            protected int nextInDTDDeclarations() throws IOException,
1063:                    XMLException {
1064:                switch (current) {
1065:                case 0x9:
1066:                case 0xA:
1067:                case 0xD:
1068:                case 0x20:
1069:                    do {
1070:                        nextChar();
1071:                    } while (current != -1
1072:                            && XMLUtilities.isXMLSpace((char) current));
1073:                    return LexicalUnits.S;
1074:
1075:                case ']':
1076:                    nextChar();
1077:                    context = DOCTYPE_CONTEXT;
1078:                    inDTD = false;
1079:                    return LexicalUnits.RSQUARE_BRACKET;
1080:
1081:                case '%':
1082:                    return readPEReference();
1083:
1084:                case '<':
1085:                    switch (nextChar()) {
1086:                    case '?':
1087:                        context = PI_CONTEXT;
1088:                        return readPIStart();
1089:
1090:                    case '!':
1091:                        switch (nextChar()) {
1092:                        case '-':
1093:                            return readComment();
1094:
1095:                        case 'E':
1096:                            switch (nextChar()) {
1097:                            case 'L':
1098:                                context = ELEMENT_DECLARATION_CONTEXT;
1099:                                return readIdentifier("EMENT",
1100:                                        LexicalUnits.ELEMENT_DECLARATION_START,
1101:                                        -1);
1102:                            case 'N':
1103:                                context = ENTITY_CONTEXT;
1104:                                return readIdentifier("TITY",
1105:                                        LexicalUnits.ENTITY_START, -1);
1106:                            default:
1107:                                throw createXMLException("invalid.character");
1108:                            }
1109:
1110:                        case 'A':
1111:                            context = ATTLIST_CONTEXT;
1112:                            return readIdentifier("TTLIST",
1113:                                    LexicalUnits.ATTLIST_START, -1);
1114:                        case 'N':
1115:                            context = NOTATION_CONTEXT;
1116:                            return readIdentifier("OTATION",
1117:                                    LexicalUnits.NOTATION_START, -1);
1118:                        default:
1119:                            throw createXMLException("invalid.character");
1120:                        }
1121:                    default:
1122:                        throw createXMLException("invalid.character");
1123:                    }
1124:                default:
1125:                    throw createXMLException("invalid.character");
1126:                }
1127:            }
1128:
1129:            /**
1130:             * Reads a simple string, like the ones used for version, encoding,
1131:             * public/system identifiers...
1132:             * The current character must be the string delimiter.
1133:             * @return type.
1134:             */
1135:            protected int readString() throws IOException, XMLException {
1136:                do {
1137:                    nextChar();
1138:                } while (current != -1 && current != attrDelimiter);
1139:                if (current == -1) {
1140:                    throw createXMLException("unexpected.eof");
1141:                }
1142:                nextChar();
1143:                return LexicalUnits.STRING;
1144:            }
1145:
1146:            /**
1147:             * Reads a comment. '&lt;!-' must have been read.
1148:             */
1149:            protected int readComment() throws IOException, XMLException {
1150:                if (nextChar() != '-') {
1151:                    throw createXMLException("malformed.comment");
1152:                }
1153:                int c = nextChar();
1154:                while (c != -1) {
1155:                    while (c != -1 && c != '-') {
1156:                        c = nextChar();
1157:                    }
1158:                    c = nextChar();
1159:                    if (c == '-') {
1160:                        break;
1161:                    }
1162:                }
1163:                if (c == -1) {
1164:                    throw createXMLException("unexpected.eof");
1165:                }
1166:                c = nextChar();
1167:                if (c != '>') {
1168:                    throw createXMLException("malformed.comment");
1169:                }
1170:                nextChar();
1171:                return LexicalUnits.COMMENT;
1172:            }
1173:
1174:            /**
1175:             * Reads the given identifier.
1176:             * @param s The portion of the identifier to read.
1177:             * @param type The lexical unit type of the identifier.
1178:             * @param ntype The lexical unit type to set if the identifier do not
1179:             * match or -1 if an error must be signaled.
1180:             */
1181:            protected int readIdentifier(String s, int type, int ntype)
1182:                    throws IOException, XMLException {
1183:                int len = s.length();
1184:                for (int i = 0; i < len; i++) {
1185:                    nextChar();
1186:                    if (current != s.charAt(i)) {
1187:                        if (ntype == -1) {
1188:                            throw createXMLException("invalid.character");
1189:                        } else {
1190:                            while (current != -1
1191:                                    && XMLUtilities
1192:                                            .isXMLNameCharacter((char) current)) {
1193:                                nextChar();
1194:                            }
1195:                            return ntype;
1196:                        }
1197:                    }
1198:                }
1199:                nextChar();
1200:                return type;
1201:            }
1202:
1203:            /**
1204:             * Reads a name. The current character must be the first character.
1205:             * @param type The lexical unit type to set.
1206:             * @return type.
1207:             */
1208:            protected int readName(int type) throws IOException, XMLException {
1209:                if (current == -1) {
1210:                    throw createXMLException("unexpected.eof");
1211:                }
1212:                if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
1213:                    throw createXMLException("invalid.name");
1214:                }
1215:                do {
1216:                    nextChar();
1217:                } while (current != -1
1218:                        && XMLUtilities.isXMLNameCharacter((char) current));
1219:                return type;
1220:            }
1221:
1222:            /**
1223:             * Reads a processing instruction start.
1224:             * @return type.
1225:             */
1226:            protected int readPIStart() throws IOException, XMLException {
1227:                int c1 = nextChar();
1228:                if (c1 == -1) {
1229:                    throw createXMLException("unexpected.eof");
1230:                }
1231:                if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
1232:                    throw createXMLException("malformed.pi.target");
1233:                }
1234:                int c2 = nextChar();
1235:                if (c2 == -1 || !XMLUtilities.isXMLNameCharacter((char) c2)) {
1236:                    return LexicalUnits.PI_START;
1237:                }
1238:                int c3 = nextChar();
1239:                if (c3 == -1 || !XMLUtilities.isXMLNameCharacter((char) c3)) {
1240:                    return LexicalUnits.PI_START;
1241:                }
1242:                int c4 = nextChar();
1243:                if (c4 != -1 && XMLUtilities.isXMLNameCharacter((char) c4)) {
1244:                    do {
1245:                        nextChar();
1246:                    } while (current != -1
1247:                            && XMLUtilities.isXMLNameCharacter((char) current));
1248:                    return LexicalUnits.PI_START;
1249:                }
1250:                if ((c1 == 'x' || c1 == 'X') && (c2 == 'm' || c2 == 'M')
1251:                        && (c3 == 'l' || c3 == 'L')) {
1252:                    throw createXMLException("xml.reserved");
1253:                }
1254:                return LexicalUnits.PI_START;
1255:            }
1256:
1257:            /**
1258:             * Returns the next lexical unit in the context of a element declaration.
1259:             */
1260:            protected int nextInElementDeclaration() throws IOException,
1261:                    XMLException {
1262:                switch (current) {
1263:                case 0x9:
1264:                case 0xA:
1265:                case 0xD:
1266:                case 0x20:
1267:                    do {
1268:                        nextChar();
1269:                    } while (current != -1
1270:                            && XMLUtilities.isXMLSpace((char) current));
1271:                    return LexicalUnits.S;
1272:
1273:                case '>':
1274:                    nextChar();
1275:                    context = DTD_DECLARATIONS_CONTEXT;
1276:                    return LexicalUnits.END_CHAR;
1277:
1278:                case '%':
1279:                    nextChar();
1280:                    int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1281:                    if (current != ';') {
1282:                        throw createXMLException("malformed.parameter.entity");
1283:                    }
1284:                    nextChar();
1285:                    return t;
1286:
1287:                case 'E':
1288:                    return readIdentifier("MPTY",
1289:                            LexicalUnits.EMPTY_IDENTIFIER, LexicalUnits.NAME);
1290:
1291:                case 'A':
1292:                    return readIdentifier("NY", LexicalUnits.ANY_IDENTIFIER,
1293:                            LexicalUnits.NAME);
1294:
1295:                case '?':
1296:                    nextChar();
1297:                    return LexicalUnits.QUESTION;
1298:
1299:                case '+':
1300:                    nextChar();
1301:                    return LexicalUnits.PLUS;
1302:
1303:                case '*':
1304:                    nextChar();
1305:                    return LexicalUnits.STAR;
1306:
1307:                case '(':
1308:                    nextChar();
1309:                    return LexicalUnits.LEFT_BRACE;
1310:
1311:                case ')':
1312:                    nextChar();
1313:                    return LexicalUnits.RIGHT_BRACE;
1314:
1315:                case '|':
1316:                    nextChar();
1317:                    return LexicalUnits.PIPE;
1318:
1319:                case ',':
1320:                    nextChar();
1321:                    return LexicalUnits.COMMA;
1322:
1323:                case '#':
1324:                    return readIdentifier("PCDATA",
1325:                            LexicalUnits.PCDATA_IDENTIFIER, -1);
1326:
1327:                default:
1328:                    return readName(LexicalUnits.NAME);
1329:                }
1330:            }
1331:
1332:            /**
1333:             * Returns the next lexical unit in the context of an attribute list.
1334:             */
1335:            protected int nextInAttList() throws IOException, XMLException {
1336:                switch (current) {
1337:                case 0x9:
1338:                case 0xA:
1339:                case 0xD:
1340:                case 0x20:
1341:                    do {
1342:                        nextChar();
1343:                    } while (current != -1
1344:                            && XMLUtilities.isXMLSpace((char) current));
1345:                    return LexicalUnits.S;
1346:
1347:                case '>':
1348:                    nextChar();
1349:                    context = DTD_DECLARATIONS_CONTEXT;
1350:                    return type = LexicalUnits.END_CHAR;
1351:
1352:                case '%':
1353:                    int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1354:                    if (current != ';') {
1355:                        throw createXMLException("malformed.parameter.entity");
1356:                    }
1357:                    nextChar();
1358:                    return t;
1359:
1360:                case 'C':
1361:                    return readIdentifier("DATA",
1362:                            LexicalUnits.CDATA_IDENTIFIER, LexicalUnits.NAME);
1363:
1364:                case 'I':
1365:                    nextChar();
1366:                    if (current != 'D') {
1367:                        do {
1368:                            nextChar();
1369:                        } while (current != -1
1370:                                && XMLUtilities
1371:                                        .isXMLNameCharacter((char) current));
1372:                        return LexicalUnits.NAME;
1373:                    }
1374:                    nextChar();
1375:                    if (current == -1
1376:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1377:                        return LexicalUnits.ID_IDENTIFIER;
1378:                    }
1379:                    if (current != 'R') {
1380:                        do {
1381:                            nextChar();
1382:                        } while (current != -1
1383:                                && XMLUtilities
1384:                                        .isXMLNameCharacter((char) current));
1385:                        return LexicalUnits.NAME;
1386:                    }
1387:                    nextChar();
1388:                    if (current == -1
1389:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1390:                        return LexicalUnits.NAME;
1391:                    }
1392:                    if (current != 'E') {
1393:                        do {
1394:                            nextChar();
1395:                        } while (current != -1
1396:                                && XMLUtilities
1397:                                        .isXMLNameCharacter((char) current));
1398:                        return LexicalUnits.NAME;
1399:                    }
1400:                    nextChar();
1401:                    if (current == -1
1402:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1403:                        return LexicalUnits.NAME;
1404:                    }
1405:                    if (current != 'F') {
1406:                        do {
1407:                            nextChar();
1408:                        } while (current != -1
1409:                                && XMLUtilities
1410:                                        .isXMLNameCharacter((char) current));
1411:                        return LexicalUnits.NAME;
1412:                    }
1413:                    nextChar();
1414:                    if (current == -1
1415:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1416:                        return LexicalUnits.IDREF_IDENTIFIER;
1417:                    }
1418:                    if (current != 'S') {
1419:                        do {
1420:                            nextChar();
1421:                        } while (current != -1
1422:                                && XMLUtilities
1423:                                        .isXMLNameCharacter((char) current));
1424:                        return LexicalUnits.NAME;
1425:                    }
1426:                    nextChar();
1427:                    if (current == -1
1428:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1429:                        return LexicalUnits.IDREFS_IDENTIFIER;
1430:                    }
1431:                    do {
1432:                        nextChar();
1433:                    } while (current != -1
1434:                            && XMLUtilities.isXMLNameCharacter((char) current));
1435:                    return type = LexicalUnits.NAME;
1436:
1437:                case 'N':
1438:                    switch (nextChar()) {
1439:                    default:
1440:                        do {
1441:                            nextChar();
1442:                        } while (current != -1
1443:                                && XMLUtilities
1444:                                        .isXMLNameCharacter((char) current));
1445:                        return LexicalUnits.NAME;
1446:
1447:                    case 'O':
1448:                        context = NOTATION_TYPE_CONTEXT;
1449:                        return readIdentifier("TATION",
1450:                                LexicalUnits.NOTATION_IDENTIFIER,
1451:                                LexicalUnits.NAME);
1452:
1453:                    case 'M':
1454:                        nextChar();
1455:                        if (current == -1
1456:                                || !XMLUtilities
1457:                                        .isXMLNameCharacter((char) current)) {
1458:                            return LexicalUnits.NAME;
1459:                        }
1460:                        if (current != 'T') {
1461:                            do {
1462:                                nextChar();
1463:                            } while (current != -1
1464:                                    && XMLUtilities
1465:                                            .isXMLNameCharacter((char) current));
1466:                            return LexicalUnits.NAME;
1467:                        }
1468:                        nextChar();
1469:                        if (current == -1
1470:                                || !XMLUtilities
1471:                                        .isXMLNameCharacter((char) current)) {
1472:                            return LexicalUnits.NAME;
1473:                        }
1474:                        if (current != 'O') {
1475:                            do {
1476:                                nextChar();
1477:                            } while (current != -1
1478:                                    && XMLUtilities
1479:                                            .isXMLNameCharacter((char) current));
1480:                            return LexicalUnits.NAME;
1481:                        }
1482:                        nextChar();
1483:                        if (current == -1
1484:                                || !XMLUtilities
1485:                                        .isXMLNameCharacter((char) current)) {
1486:                            return LexicalUnits.NAME;
1487:                        }
1488:                        if (current != 'K') {
1489:                            do {
1490:                                nextChar();
1491:                            } while (current != -1
1492:                                    && XMLUtilities
1493:                                            .isXMLNameCharacter((char) current));
1494:                            return LexicalUnits.NAME;
1495:                        }
1496:                        nextChar();
1497:                        if (current == -1
1498:                                || !XMLUtilities
1499:                                        .isXMLNameCharacter((char) current)) {
1500:                            return LexicalUnits.NAME;
1501:                        }
1502:                        if (current != 'E') {
1503:                            do {
1504:                                nextChar();
1505:                            } while (current != -1
1506:                                    && XMLUtilities
1507:                                            .isXMLNameCharacter((char) current));
1508:                            return LexicalUnits.NAME;
1509:                        }
1510:                        nextChar();
1511:                        if (current == -1
1512:                                || !XMLUtilities
1513:                                        .isXMLNameCharacter((char) current)) {
1514:                            return LexicalUnits.NAME;
1515:                        }
1516:                        if (current != 'N') {
1517:                            do {
1518:                                nextChar();
1519:                            } while (current != -1
1520:                                    && XMLUtilities
1521:                                            .isXMLNameCharacter((char) current));
1522:                            return LexicalUnits.NAME;
1523:                        }
1524:                        nextChar();
1525:                        if (current == -1
1526:                                || !XMLUtilities
1527:                                        .isXMLNameCharacter((char) current)) {
1528:                            return LexicalUnits.NMTOKEN_IDENTIFIER;
1529:                        }
1530:                        if (current != 'S') {
1531:                            do {
1532:                                nextChar();
1533:                            } while (current != -1
1534:                                    && XMLUtilities
1535:                                            .isXMLNameCharacter((char) current));
1536:                            return LexicalUnits.NAME;
1537:                        }
1538:                        nextChar();
1539:                        if (current == -1
1540:                                || !XMLUtilities
1541:                                        .isXMLNameCharacter((char) current)) {
1542:                            return LexicalUnits.NMTOKENS_IDENTIFIER;
1543:                        }
1544:                        do {
1545:                            nextChar();
1546:                        } while (current != -1
1547:                                && XMLUtilities
1548:                                        .isXMLNameCharacter((char) current));
1549:                        return LexicalUnits.NAME;
1550:                    }
1551:
1552:                case 'E':
1553:                    nextChar();
1554:                    if (current != 'N') {
1555:                        do {
1556:                            nextChar();
1557:                        } while (current != -1
1558:                                && XMLUtilities
1559:                                        .isXMLNameCharacter((char) current));
1560:                        return LexicalUnits.NAME;
1561:                    }
1562:                    nextChar();
1563:                    if (current == -1
1564:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1565:                        return LexicalUnits.NAME;
1566:                    }
1567:                    if (current != 'T') {
1568:                        do {
1569:                            nextChar();
1570:                        } while (current != -1
1571:                                && XMLUtilities
1572:                                        .isXMLNameCharacter((char) current));
1573:                        return LexicalUnits.NAME;
1574:                    }
1575:                    nextChar();
1576:                    if (current == -1
1577:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1578:                        return LexicalUnits.NAME;
1579:                    }
1580:                    if (current != 'I') {
1581:                        do {
1582:                            nextChar();
1583:                        } while (current != -1
1584:                                && XMLUtilities
1585:                                        .isXMLNameCharacter((char) current));
1586:                        return LexicalUnits.NAME;
1587:                    }
1588:                    nextChar();
1589:                    if (current == -1
1590:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1591:                        return LexicalUnits.NAME;
1592:                    }
1593:                    if (current != 'T') {
1594:                        do {
1595:                            nextChar();
1596:                        } while (current != -1
1597:                                && XMLUtilities
1598:                                        .isXMLNameCharacter((char) current));
1599:                        return type = LexicalUnits.NAME;
1600:                    }
1601:                    nextChar();
1602:                    if (current == -1
1603:                            || !XMLUtilities.isXMLNameCharacter((char) current)) {
1604:                        return LexicalUnits.NAME;
1605:                    }
1606:                    switch (current) {
1607:                    case 'Y':
1608:                        nextChar();
1609:                        if (current == -1
1610:                                || !XMLUtilities
1611:                                        .isXMLNameCharacter((char) current)) {
1612:                            return LexicalUnits.ENTITY_IDENTIFIER;
1613:                        }
1614:                        do {
1615:                            nextChar();
1616:                        } while (current != -1
1617:                                && XMLUtilities
1618:                                        .isXMLNameCharacter((char) current));
1619:                        return LexicalUnits.NAME;
1620:                    case 'I':
1621:                        nextChar();
1622:                        if (current == -1
1623:                                || !XMLUtilities
1624:                                        .isXMLNameCharacter((char) current)) {
1625:                            return LexicalUnits.NAME;
1626:                        }
1627:                        if (current != 'E') {
1628:                            do {
1629:                                nextChar();
1630:                            } while (current != -1
1631:                                    && XMLUtilities
1632:                                            .isXMLNameCharacter((char) current));
1633:                            return LexicalUnits.NAME;
1634:                        }
1635:                        nextChar();
1636:                        if (current == -1
1637:                                || !XMLUtilities
1638:                                        .isXMLNameCharacter((char) current)) {
1639:                            return LexicalUnits.NAME;
1640:                        }
1641:                        if (current != 'S') {
1642:                            do {
1643:                                nextChar();
1644:                            } while (current != -1
1645:                                    && XMLUtilities
1646:                                            .isXMLNameCharacter((char) current));
1647:                            return LexicalUnits.NAME;
1648:                        }
1649:                        return LexicalUnits.ENTITIES_IDENTIFIER;
1650:
1651:                    default:
1652:                        if (current == -1
1653:                                || !XMLUtilities
1654:                                        .isXMLNameCharacter((char) current)) {
1655:                            return LexicalUnits.NAME;
1656:                        }
1657:                        do {
1658:                            nextChar();
1659:                        } while (current != -1
1660:                                && XMLUtilities
1661:                                        .isXMLNameCharacter((char) current));
1662:                        return LexicalUnits.NAME;
1663:                    }
1664:
1665:                case '"':
1666:                    attrDelimiter = '"';
1667:                    nextChar();
1668:                    if (current == -1) {
1669:                        throw createXMLException("unexpected.eof");
1670:                    }
1671:                    if (current != '"' && current != '&') {
1672:                        do {
1673:                            nextChar();
1674:                        } while (current != -1 && current != '"'
1675:                                && current != '&');
1676:                    }
1677:                    switch (current) {
1678:                    case '&':
1679:                        context = ATTRIBUTE_VALUE_CONTEXT;
1680:                        return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1681:
1682:                    case '"':
1683:                        nextChar();
1684:                        return LexicalUnits.STRING;
1685:
1686:                    default:
1687:                        throw createXMLException("invalid.character");
1688:                    }
1689:
1690:                case '\'':
1691:                    attrDelimiter = '\'';
1692:                    nextChar();
1693:                    if (current == -1) {
1694:                        throw createXMLException("unexpected.eof");
1695:                    }
1696:                    if (current != '\'' && current != '&') {
1697:                        do {
1698:                            nextChar();
1699:                        } while (current != -1 && current != '\''
1700:                                && current != '&');
1701:                    }
1702:                    switch (current) {
1703:                    case '&':
1704:                        context = ATTRIBUTE_VALUE_CONTEXT;
1705:                        return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1706:
1707:                    case '\'':
1708:                        nextChar();
1709:                        return LexicalUnits.STRING;
1710:
1711:                    default:
1712:                        throw createXMLException("invalid.character");
1713:                    }
1714:
1715:                case '#':
1716:                    switch (nextChar()) {
1717:                    case 'R':
1718:                        return readIdentifier("EQUIRED",
1719:                                LexicalUnits.REQUIRED_IDENTIFIER, -1);
1720:
1721:                    case 'I':
1722:                        return readIdentifier("MPLIED",
1723:                                LexicalUnits.IMPLIED_IDENTIFIER, -1);
1724:                    case 'F':
1725:                        return readIdentifier("IXED",
1726:                                LexicalUnits.FIXED_IDENTIFIER, -1);
1727:                    default:
1728:                        throw createXMLException("invalid.character");
1729:                    }
1730:
1731:                case '(':
1732:                    nextChar();
1733:                    context = ENUMERATION_CONTEXT;
1734:                    return LexicalUnits.LEFT_BRACE;
1735:
1736:                default:
1737:                    return readName(LexicalUnits.NAME);
1738:                }
1739:            }
1740:
1741:            /**
1742:             * Returns the next lexical unit in the context of a notation.
1743:             */
1744:            protected int nextInNotation() throws IOException, XMLException {
1745:                switch (current) {
1746:                case 0x9:
1747:                case 0xA:
1748:                case 0xD:
1749:                case 0x20:
1750:                    do {
1751:                        nextChar();
1752:                    } while (current != -1
1753:                            && XMLUtilities.isXMLSpace((char) current));
1754:                    return LexicalUnits.S;
1755:
1756:                case '>':
1757:                    nextChar();
1758:                    context = DTD_DECLARATIONS_CONTEXT;
1759:                    return LexicalUnits.END_CHAR;
1760:
1761:                case '%':
1762:                    int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1763:                    if (current != ';') {
1764:                        throw createXMLException("malformed.parameter.entity");
1765:                    }
1766:                    nextChar();
1767:                    return t;
1768:                case 'S':
1769:                    return readIdentifier("YSTEM",
1770:                            LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1771:
1772:                case 'P':
1773:                    return readIdentifier("UBLIC",
1774:                            LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1775:
1776:                case '"':
1777:                    attrDelimiter = '"';
1778:                    return readString();
1779:
1780:                case '\'':
1781:                    attrDelimiter = '\'';
1782:                    return readString();
1783:
1784:                default:
1785:                    return readName(LexicalUnits.NAME);
1786:                }
1787:            }
1788:
1789:            /**
1790:             * Returns the next lexical unit in the context of an entity.
1791:             */
1792:            protected int nextInEntity() throws IOException, XMLException {
1793:                switch (current) {
1794:                case 0x9:
1795:                case 0xA:
1796:                case 0xD:
1797:                case 0x20:
1798:                    do {
1799:                        nextChar();
1800:                    } while (current != -1
1801:                            && XMLUtilities.isXMLSpace((char) current));
1802:                    return LexicalUnits.S;
1803:
1804:                case '>':
1805:                    nextChar();
1806:                    context = DTD_DECLARATIONS_CONTEXT;
1807:                    return LexicalUnits.END_CHAR;
1808:
1809:                case '%':
1810:                    nextChar();
1811:                    return LexicalUnits.PERCENT;
1812:
1813:                case 'S':
1814:                    return readIdentifier("YSTEM",
1815:                            LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1816:
1817:                case 'P':
1818:                    return readIdentifier("UBLIC",
1819:                            LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1820:
1821:                case 'N':
1822:                    return readIdentifier("DATA",
1823:                            LexicalUnits.NDATA_IDENTIFIER, LexicalUnits.NAME);
1824:
1825:                case '"':
1826:                    attrDelimiter = '"';
1827:                    nextChar();
1828:                    if (current == -1) {
1829:                        throw createXMLException("unexpected.eof");
1830:                    }
1831:
1832:                    if (current != '"' && current != '&' && current != '%') {
1833:                        do {
1834:                            nextChar();
1835:                        } while (current != -1 && current != '"'
1836:                                && current != '&' && current != '%');
1837:                    }
1838:                    switch (current) {
1839:                    default:
1840:                        throw createXMLException("invalid.character");
1841:
1842:                    case '&':
1843:                    case '%':
1844:                        context = ENTITY_VALUE_CONTEXT;
1845:                        break;
1846:
1847:                    case '"':
1848:                        nextChar();
1849:                        return LexicalUnits.STRING;
1850:                    }
1851:                    return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1852:
1853:                case '\'':
1854:                    attrDelimiter = '\'';
1855:                    nextChar();
1856:                    if (current == -1) {
1857:                        throw createXMLException("unexpected.eof");
1858:                    }
1859:
1860:                    if (current != '\'' && current != '&' && current != '%') {
1861:                        do {
1862:                            nextChar();
1863:                        } while (current != -1 && current != '\''
1864:                                && current != '&' && current != '%');
1865:                    }
1866:                    switch (current) {
1867:                    default:
1868:                        throw createXMLException("invalid.character");
1869:
1870:                    case '&':
1871:                    case '%':
1872:                        context = ENTITY_VALUE_CONTEXT;
1873:                        break;
1874:
1875:                    case '\'':
1876:                        nextChar();
1877:                        return LexicalUnits.STRING;
1878:                    }
1879:                    return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1880:
1881:                default:
1882:                    return readName(LexicalUnits.NAME);
1883:                }
1884:            }
1885:
1886:            /**
1887:             * Returns the next lexical unit in the context of an entity value.
1888:             */
1889:            protected int nextInEntityValue() throws IOException, XMLException {
1890:                switch (current) {
1891:                case '&':
1892:                    return readReference();
1893:
1894:                case '%':
1895:                    int t = nextChar();
1896:                    readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1897:                    if (current != ';') {
1898:                        throw createXMLException("invalid.parameter.entity");
1899:                    }
1900:                    nextChar();
1901:                    return t;
1902:
1903:                default:
1904:                    while (current != -1 && current != attrDelimiter
1905:                            && current != '&' && current != '%') {
1906:                        nextChar();
1907:                    }
1908:                    switch (current) {
1909:                    case -1:
1910:                        throw createXMLException("unexpected.eof");
1911:
1912:                    case '\'':
1913:                    case '"':
1914:                        nextChar();
1915:                        context = ENTITY_CONTEXT;
1916:                        return LexicalUnits.STRING;
1917:                    }
1918:                    return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1919:                }
1920:            }
1921:
1922:            /**
1923:             * Returns the next lexical unit in the context of a notation type.
1924:             */
1925:            protected int nextInNotationType() throws IOException, XMLException {
1926:                switch (current) {
1927:                case 0x9:
1928:                case 0xA:
1929:                case 0xD:
1930:                case 0x20:
1931:                    do {
1932:                        nextChar();
1933:                    } while (current != -1
1934:                            && XMLUtilities.isXMLSpace((char) current));
1935:                    return LexicalUnits.S;
1936:
1937:                case '|':
1938:                    nextChar();
1939:                    return LexicalUnits.PIPE;
1940:
1941:                case '(':
1942:                    nextChar();
1943:                    return LexicalUnits.LEFT_BRACE;
1944:
1945:                case ')':
1946:                    nextChar();
1947:                    context = ATTLIST_CONTEXT;
1948:                    return LexicalUnits.RIGHT_BRACE;
1949:
1950:                default:
1951:                    return readName(LexicalUnits.NAME);
1952:                }
1953:            }
1954:
1955:            /**
1956:             * Returns the next lexical unit in the context of an enumeration.
1957:             */
1958:            protected int nextInEnumeration() throws IOException, XMLException {
1959:                switch (current) {
1960:                case 0x9:
1961:                case 0xA:
1962:                case 0xD:
1963:                case 0x20:
1964:                    do {
1965:                        nextChar();
1966:                    } while (current != -1
1967:                            && XMLUtilities.isXMLSpace((char) current));
1968:                    return LexicalUnits.S;
1969:
1970:                case '|':
1971:                    nextChar();
1972:                    return LexicalUnits.PIPE;
1973:
1974:                case ')':
1975:                    nextChar();
1976:                    context = ATTLIST_CONTEXT;
1977:                    return LexicalUnits.RIGHT_BRACE;
1978:
1979:                default:
1980:                    return readNmtoken();
1981:                }
1982:            }
1983:
1984:            /**
1985:             * Reads an entity or character reference. The current character
1986:             * must be '&amp;'.
1987:             * @return type.
1988:             */
1989:            protected int readReference() throws IOException, XMLException {
1990:                nextChar();
1991:                if (current == '#') {
1992:                    nextChar();
1993:                    int i = 0;
1994:                    switch (current) {
1995:                    case 'x':
1996:                        do {
1997:                            i++;
1998:                            nextChar();
1999:                        } while ((current >= '0' && current <= '9')
2000:                                || (current >= 'a' && current <= 'f')
2001:                                || (current >= 'A' && current <= 'F'));
2002:                        break;
2003:
2004:                    default:
2005:                        do {
2006:                            i++;
2007:                            nextChar();
2008:                        } while (current >= '0' && current <= '9');
2009:                        break;
2010:
2011:                    case -1:
2012:                        throw createXMLException("unexpected.eof");
2013:                    }
2014:                    if (i == 1 || current != ';') {
2015:                        throw createXMLException("character.reference");
2016:                    }
2017:                    nextChar();
2018:                    return LexicalUnits.CHARACTER_REFERENCE;
2019:                } else {
2020:                    int t = readName(LexicalUnits.ENTITY_REFERENCE);
2021:                    if (current != ';') {
2022:                        throw createXMLException("character.reference");
2023:                    }
2024:                    nextChar();
2025:                    return t;
2026:                }
2027:            }
2028:
2029:            /**
2030:             * Reads a parameter entity reference. The current character must be '%'.
2031:             * @return type.
2032:             */
2033:            protected int readPEReference() throws IOException, XMLException {
2034:                nextChar();
2035:                if (current == -1) {
2036:                    throw createXMLException("unexpected.eof");
2037:                }
2038:                if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
2039:                    throw createXMLException("invalid.parameter.entity");
2040:                }
2041:                do {
2042:                    nextChar();
2043:                } while (current != -1
2044:                        && XMLUtilities.isXMLNameCharacter((char) current));
2045:                if (current != ';') {
2046:                    throw createXMLException("invalid.parameter.entity");
2047:                }
2048:                nextChar();
2049:                return LexicalUnits.PARAMETER_ENTITY_REFERENCE;
2050:            }
2051:
2052:            /**
2053:             * Reads a Nmtoken. The current character must be the first character.
2054:             * @return LexicalUnits.NMTOKEN.
2055:             */
2056:            protected int readNmtoken() throws IOException, XMLException {
2057:                if (current == -1) {
2058:                    throw createXMLException("unexpected.eof");
2059:                }
2060:                while (XMLUtilities.isXMLNameCharacter((char) current)) {
2061:                    nextChar();
2062:                }
2063:                return LexicalUnits.NMTOKEN;
2064:            }
2065:
2066:            /**
2067:             * Sets the value of the current char to the next character or -1 if the
2068:             * end of stream has been reached.
2069:             */
2070:            protected int nextChar() throws IOException {
2071:                current = reader.read();
2072:
2073:                if (current == -1) {
2074:                    return current;
2075:                }
2076:
2077:                if (position == buffer.length) {
2078:                    char[] t = new char[1 + position + position / 2];
2079:                    System.arraycopy(buffer, 0, t, 0, position);
2080:                    buffer = t;
2081:                }
2082:
2083:                return buffer[position++] = (char) current;
2084:            }
2085:
2086:            /**
2087:             * Returns an XMLException initialized with the given message key.
2088:             */
2089:            protected XMLException createXMLException(String message) {
2090:                String m;
2091:                try {
2092:                    m = formatMessage(message, new Object[] {
2093:                            new Integer(reader.getLine()),
2094:                            new Integer(reader.getColumn()) });
2095:                } catch (MissingResourceException e) {
2096:                    m = message;
2097:                }
2098:                return new XMLException(m);
2099:            }
2100:
2101:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.