0001: /*
0002:
0003: Licensed to the Apache Software Foundation (ASF) under one or more
0004: contributor license agreements. See the NOTICE file distributed with
0005: this work for additional information regarding copyright ownership.
0006: The ASF licenses this file to You under the Apache License, Version 2.0
0007: (the "License"); you may not use this file except in compliance with
0008: the License. You may obtain a copy of the License at
0009:
0010: http://www.apache.org/licenses/LICENSE-2.0
0011:
0012: Unless required by applicable law or agreed to in writing, software
0013: distributed under the License is distributed on an "AS IS" BASIS,
0014: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0015: See the License for the specific language governing permissions and
0016: limitations under the License.
0017:
0018: */
0019: package org.apache.batik.xml;
0020:
0021: import java.io.IOException;
0022: import java.io.InputStream;
0023: import java.io.Reader;
0024: import java.util.Locale;
0025: import java.util.MissingResourceException;
0026:
0027: import org.apache.batik.i18n.Localizable;
0028: import org.apache.batik.i18n.LocalizableSupport;
0029: import org.apache.batik.util.io.NormalizingReader;
0030: import org.apache.batik.util.io.StreamNormalizingReader;
0031: import org.apache.batik.util.io.StringNormalizingReader;
0032:
0033: /**
0034: * This class represents a scanner for XML documents.
0035: *
0036: * @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
0037: * @version $Id: XMLScanner.java 489226 2006-12-21 00:05:36Z cam $
0038: */
0039: public class XMLScanner implements Localizable {
0040:
0041: /**
0042: * The document start context.
0043: */
0044: public static final int DOCUMENT_START_CONTEXT = 0;
0045:
0046: /**
0047: * The top level context.
0048: */
0049: public static final int TOP_LEVEL_CONTEXT = 1;
0050:
0051: /**
0052: * The processing instruction context.
0053: */
0054: public static final int PI_CONTEXT = 2;
0055:
0056: /**
0057: * The XML declaration context.
0058: */
0059: public static final int XML_DECL_CONTEXT = 3;
0060:
0061: /**
0062: * The doctype context.
0063: */
0064: public static final int DOCTYPE_CONTEXT = 4;
0065:
0066: /**
0067: * The start tag context.
0068: */
0069: public static final int START_TAG_CONTEXT = 5;
0070:
0071: /**
0072: * The content context.
0073: */
0074: public static final int CONTENT_CONTEXT = 6;
0075:
0076: /**
0077: * The DTD declarations context.
0078: */
0079: public static final int DTD_DECLARATIONS_CONTEXT = 7;
0080:
0081: /**
0082: * The CDATA section context.
0083: */
0084: public static final int CDATA_SECTION_CONTEXT = 8;
0085:
0086: /**
0087: * The end tag context.
0088: */
0089: public static final int END_TAG_CONTEXT = 9;
0090:
0091: /**
0092: * The attribute value context.
0093: */
0094: public static final int ATTRIBUTE_VALUE_CONTEXT = 10;
0095:
0096: /**
0097: * The ATTLIST context.
0098: */
0099: public static final int ATTLIST_CONTEXT = 11;
0100:
0101: /**
0102: * The element declaration context.
0103: */
0104: public static final int ELEMENT_DECLARATION_CONTEXT = 12;
0105:
0106: /**
0107: * The entity context.
0108: */
0109: public static final int ENTITY_CONTEXT = 13;
0110:
0111: /**
0112: * The notation context.
0113: */
0114: public static final int NOTATION_CONTEXT = 14;
0115:
0116: /**
0117: * The notation type context.
0118: */
0119: public static final int NOTATION_TYPE_CONTEXT = 15;
0120:
0121: /**
0122: * The enumeration context.
0123: */
0124: public static final int ENUMERATION_CONTEXT = 16;
0125:
0126: /**
0127: * The entity value context.
0128: */
0129: public static final int ENTITY_VALUE_CONTEXT = 17;
0130:
0131: /**
0132: * The default resource bundle base name.
0133: */
0134: protected static final String BUNDLE_CLASSNAME = "org.apache.batik.xml.resources.Messages";
0135:
0136: /**
0137: * The localizable support.
0138: */
0139: protected LocalizableSupport localizableSupport = new LocalizableSupport(
0140: BUNDLE_CLASSNAME, XMLScanner.class.getClassLoader());
0141:
0142: /**
0143: * The reader.
0144: */
0145: protected NormalizingReader reader;
0146:
0147: /**
0148: * The current char.
0149: */
0150: protected int current;
0151:
0152: /**
0153: * The type of the current lexical unit.
0154: */
0155: protected int type;
0156:
0157: /**
0158: * The recording buffer.
0159: */
0160: protected char[] buffer = new char[1024];
0161:
0162: /**
0163: * The current position in the buffer.
0164: */
0165: protected int position;
0166:
0167: /**
0168: * The start offset of the last lexical unit.
0169: */
0170: protected int start;
0171:
0172: /**
0173: * The end offset of the last lexical unit.
0174: */
0175: protected int end;
0176:
0177: /**
0178: * The current scanning context.
0179: */
0180: protected int context;
0181:
0182: /**
0183: * The depth in the xml tree.
0184: */
0185: protected int depth;
0186:
0187: /**
0188: * A PI end has been previously read.
0189: */
0190: protected boolean piEndRead;
0191:
0192: /**
0193: * The scanner is in the internal DTD.
0194: */
0195: protected boolean inDTD;
0196:
0197: /**
0198: * The last attribute delimiter encountered.
0199: */
0200: protected char attrDelimiter;
0201:
0202: /**
0203: * A CDATA section end is the next token
0204: */
0205: protected boolean cdataEndRead;
0206:
0207: /**
0208: * Creates a new XML scanner.
0209: * @param r The reader to scan.
0210: */
0211: public XMLScanner(Reader r) throws XMLException {
0212: context = DOCUMENT_START_CONTEXT;
0213: try {
0214: reader = new StreamNormalizingReader(r);
0215: current = nextChar();
0216: } catch (IOException e) {
0217: throw new XMLException(e);
0218: }
0219: }
0220:
0221: /**
0222: * Creates a new XML scanner.
0223: * @param is The input stream to scan.
0224: * @param enc The character encoding to use.
0225: */
0226: public XMLScanner(InputStream is, String enc) throws XMLException {
0227: context = DOCUMENT_START_CONTEXT;
0228: try {
0229: reader = new StreamNormalizingReader(is, enc);
0230: current = nextChar();
0231: } catch (IOException e) {
0232: throw new XMLException(e);
0233: }
0234: }
0235:
0236: /**
0237: * Creates a new XML scanner.
0238: * @param s The string to parse.
0239: */
0240: public XMLScanner(String s) throws XMLException {
0241: context = DOCUMENT_START_CONTEXT;
0242: try {
0243: reader = new StringNormalizingReader(s);
0244: current = nextChar();
0245: } catch (IOException e) {
0246: throw new XMLException(e);
0247: }
0248: }
0249:
0250: /**
0251: * Implements {@link org.apache.batik.i18n.Localizable#setLocale(Locale)}.
0252: */
0253: public void setLocale(Locale l) {
0254: localizableSupport.setLocale(l);
0255: }
0256:
0257: /**
0258: * Implements {@link org.apache.batik.i18n.Localizable#getLocale()}.
0259: */
0260: public Locale getLocale() {
0261: return localizableSupport.getLocale();
0262: }
0263:
0264: /**
0265: * Implements {@link
0266: * org.apache.batik.i18n.Localizable#formatMessage(String,Object[])}.
0267: */
0268: public String formatMessage(String key, Object[] args)
0269: throws MissingResourceException {
0270: return localizableSupport.formatMessage(key, args);
0271: }
0272:
0273: /**
0274: * Sets the current depth in the XML tree.
0275: */
0276: public void setDepth(int i) {
0277: depth = i;
0278: }
0279:
0280: /**
0281: * Returns the current depth in the XML tree.
0282: */
0283: public int getDepth() {
0284: return depth;
0285: }
0286:
0287: /**
0288: * Sets the current context.
0289: */
0290: public void setContext(int c) {
0291: context = c;
0292: }
0293:
0294: /**
0295: * Returns the current context.
0296: */
0297: public int getContext() {
0298: return context;
0299: }
0300:
0301: /**
0302: * The current lexical unit type like defined in LexicalUnits.
0303: */
0304: public int getType() {
0305: return type;
0306: }
0307:
0308: /**
0309: * Returns the current line.
0310: */
0311: public int getLine() {
0312: return reader.getLine();
0313: }
0314:
0315: /**
0316: * Returns the current column.
0317: */
0318: public int getColumn() {
0319: return reader.getColumn();
0320: }
0321:
0322: /**
0323: * Returns the buffer used to store the chars.
0324: */
0325: public char[] getBuffer() {
0326: return buffer;
0327: }
0328:
0329: /**
0330: * Returns the start offset of the last lexical unit.
0331: */
0332: public int getStart() {
0333: return start;
0334: }
0335:
0336: /**
0337: * Returns the end offset of the last lexical unit.
0338: */
0339: public int getEnd() {
0340: return end;
0341: }
0342:
0343: /**
0344: * Returns the last encountered string delimiter.
0345: */
0346: public char getStringDelimiter() {
0347: return attrDelimiter;
0348: }
0349:
0350: /**
0351: * Returns the start offset of the current lexical unit.
0352: */
0353: public int getStartOffset() {
0354: switch (type) {
0355: case LexicalUnits.SECTION_END:
0356: return -3;
0357:
0358: case LexicalUnits.PI_END:
0359: return -2;
0360:
0361: case LexicalUnits.STRING:
0362: case LexicalUnits.ENTITY_REFERENCE:
0363: case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
0364: case LexicalUnits.START_TAG:
0365: case LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT:
0366: return 1;
0367:
0368: case LexicalUnits.PI_START:
0369: case LexicalUnits.END_TAG:
0370: case LexicalUnits.CHARACTER_REFERENCE:
0371: return 2;
0372:
0373: case LexicalUnits.COMMENT:
0374: return 4;
0375:
0376: default:
0377: return 0;
0378: }
0379: }
0380:
0381: /**
0382: * Returns the end offset of the current lexical unit.
0383: */
0384: public int getEndOffset() {
0385: switch (type) {
0386: case LexicalUnits.STRING:
0387: case LexicalUnits.ENTITY_REFERENCE:
0388: case LexicalUnits.CHARACTER_REFERENCE:
0389: case LexicalUnits.PARAMETER_ENTITY_REFERENCE:
0390: case LexicalUnits.LAST_ATTRIBUTE_FRAGMENT:
0391: return -1;
0392:
0393: case LexicalUnits.PI_DATA:
0394: return -2;
0395:
0396: case LexicalUnits.COMMENT:
0397: return -3;
0398:
0399: case LexicalUnits.CHARACTER_DATA:
0400: if (cdataEndRead) {
0401: return -3;
0402: }
0403: return 0;
0404:
0405: default:
0406: return 0;
0407: }
0408: }
0409:
0410: /**
0411: * Clears the buffer.
0412: */
0413: public void clearBuffer() {
0414: if (position <= 0) {
0415: position = 0;
0416: } else {
0417: buffer[0] = buffer[position - 1];
0418: position = 1;
0419: }
0420: }
0421:
0422: /**
0423: * Advances to the next lexical unit.
0424: * @return The type of the lexical unit like defined in LexicalUnits.
0425: */
0426: public int next() throws XMLException {
0427: return next(context);
0428: }
0429:
0430: /**
0431: * Advances to the next lexical unit.
0432: * @param ctx The context to use for scanning.
0433: * @return The type of the lexical unit like defined in LexicalUnits.
0434: */
0435: public int next(int ctx) throws XMLException {
0436: start = position - 1;
0437: try {
0438: switch (ctx) {
0439: case DOCUMENT_START_CONTEXT:
0440: type = nextInDocumentStart();
0441: break;
0442:
0443: case TOP_LEVEL_CONTEXT:
0444: type = nextInTopLevel();
0445: break;
0446:
0447: case PI_CONTEXT:
0448: type = nextInPI();
0449: break;
0450:
0451: case START_TAG_CONTEXT:
0452: type = nextInStartTag();
0453: break;
0454:
0455: case ATTRIBUTE_VALUE_CONTEXT:
0456: type = nextInAttributeValue();
0457: break;
0458:
0459: case CONTENT_CONTEXT:
0460: type = nextInContent();
0461: break;
0462:
0463: case END_TAG_CONTEXT:
0464: type = nextInEndTag();
0465: break;
0466:
0467: case CDATA_SECTION_CONTEXT:
0468: type = nextInCDATASection();
0469: break;
0470:
0471: case XML_DECL_CONTEXT:
0472: type = nextInXMLDecl();
0473: break;
0474:
0475: case DOCTYPE_CONTEXT:
0476: type = nextInDoctype();
0477: break;
0478:
0479: case DTD_DECLARATIONS_CONTEXT:
0480: type = nextInDTDDeclarations();
0481: break;
0482:
0483: case ELEMENT_DECLARATION_CONTEXT:
0484: type = nextInElementDeclaration();
0485: break;
0486:
0487: case ATTLIST_CONTEXT:
0488: type = nextInAttList();
0489: break;
0490:
0491: case NOTATION_CONTEXT:
0492: type = nextInNotation();
0493: break;
0494:
0495: case ENTITY_CONTEXT:
0496: type = nextInEntity();
0497: break;
0498:
0499: case ENTITY_VALUE_CONTEXT:
0500: return nextInEntityValue();
0501:
0502: case NOTATION_TYPE_CONTEXT:
0503: return nextInNotationType();
0504:
0505: case ENUMERATION_CONTEXT:
0506: return nextInEnumeration();
0507:
0508: default:
0509: throw new IllegalArgumentException("unexpected ctx:"
0510: + ctx);
0511: }
0512: } catch (IOException e) {
0513: throw new XMLException(e);
0514: }
0515: end = position - ((current == -1) ? 0 : 1);
0516: return type;
0517: }
0518:
0519: /**
0520: * Reads the first token in the stream.
0521: */
0522: protected int nextInDocumentStart() throws IOException,
0523: XMLException {
0524: switch (current) {
0525: case 0x9:
0526: case 0xA:
0527: case 0xD:
0528: case 0x20:
0529: do {
0530: nextChar();
0531: } while (current != -1
0532: && XMLUtilities.isXMLSpace((char) current));
0533: context = (depth == 0) ? TOP_LEVEL_CONTEXT
0534: : CONTENT_CONTEXT;
0535: return LexicalUnits.S;
0536:
0537: case '<':
0538: switch (nextChar()) {
0539: case '?':
0540: int c1 = nextChar();
0541: if (c1 == -1
0542: || !XMLUtilities
0543: .isXMLNameFirstCharacter((char) c1)) {
0544: throw createXMLException("invalid.pi.target");
0545: }
0546: context = PI_CONTEXT;
0547: int c2 = nextChar();
0548: if (c2 == -1
0549: || !XMLUtilities.isXMLNameCharacter((char) c2)) {
0550: return LexicalUnits.PI_START;
0551: }
0552: int c3 = nextChar();
0553: if (c3 == -1
0554: || !XMLUtilities.isXMLNameCharacter((char) c3)) {
0555: return LexicalUnits.PI_START;
0556: }
0557: int c4 = nextChar();
0558: if (c4 != -1
0559: && XMLUtilities.isXMLNameCharacter((char) c4)) {
0560: do {
0561: nextChar();
0562: } while (current != -1
0563: && XMLUtilities
0564: .isXMLNameCharacter((char) current));
0565: return LexicalUnits.PI_START;
0566: }
0567: if (c1 == 'x' && c2 == 'm' && c3 == 'l') {
0568: context = XML_DECL_CONTEXT;
0569: return LexicalUnits.XML_DECL_START;
0570: }
0571: if ((c1 == 'x' || c1 == 'X')
0572: && (c2 == 'm' || c2 == 'M')
0573: && (c3 == 'l' || c3 == 'L')) {
0574: throw createXMLException("xml.reserved");
0575: }
0576: return LexicalUnits.PI_START;
0577:
0578: case '!':
0579: switch (nextChar()) {
0580: case '-':
0581: return readComment();
0582:
0583: case 'D':
0584: context = DOCTYPE_CONTEXT;
0585: return readIdentifier("OCTYPE",
0586: LexicalUnits.DOCTYPE_START, -1);
0587:
0588: default:
0589: throw createXMLException("invalid.doctype");
0590: }
0591:
0592: default:
0593: context = START_TAG_CONTEXT;
0594: depth++;
0595: return readName(LexicalUnits.START_TAG);
0596: }
0597:
0598: case -1:
0599: return LexicalUnits.EOF;
0600:
0601: default:
0602: if (depth == 0) {
0603: throw createXMLException("invalid.character");
0604: } else {
0605: return nextInContent();
0606: }
0607: }
0608: }
0609:
0610: /**
0611: * Advances to the next lexical unit in the top level context.
0612: * @return The type of the lexical unit like defined in LexicalUnits.
0613: */
0614: protected int nextInTopLevel() throws IOException, XMLException {
0615: switch (current) {
0616: case 0x9:
0617: case 0xA:
0618: case 0xD:
0619: case 0x20:
0620: do {
0621: nextChar();
0622: } while (current != -1
0623: && XMLUtilities.isXMLSpace((char) current));
0624: return LexicalUnits.S;
0625:
0626: case '<':
0627: switch (nextChar()) {
0628: case '?':
0629: context = PI_CONTEXT;
0630: return readPIStart();
0631:
0632: case '!':
0633: switch (nextChar()) {
0634: case '-':
0635: return readComment();
0636:
0637: case 'D':
0638: context = DOCTYPE_CONTEXT;
0639: return readIdentifier("OCTYPE",
0640: LexicalUnits.DOCTYPE_START, -1);
0641:
0642: default:
0643: throw createXMLException("invalid.character");
0644: }
0645: default:
0646: context = START_TAG_CONTEXT;
0647: depth++;
0648: return readName(LexicalUnits.START_TAG);
0649: }
0650:
0651: case -1:
0652: return LexicalUnits.EOF;
0653:
0654: default:
0655: throw createXMLException("invalid.character");
0656: }
0657: }
0658:
0659: /**
0660: * Returns the next lexical unit in the context of a processing
0661: * instruction.
0662: */
0663: protected int nextInPI() throws IOException, XMLException {
0664: if (piEndRead) {
0665: piEndRead = false;
0666: context = (depth == 0) ? TOP_LEVEL_CONTEXT
0667: : CONTENT_CONTEXT;
0668: return LexicalUnits.PI_END;
0669: }
0670:
0671: switch (current) {
0672: case 0x9:
0673: case 0xA:
0674: case 0xD:
0675: case 0x20:
0676: do {
0677: nextChar();
0678: } while (current != -1
0679: && XMLUtilities.isXMLSpace((char) current));
0680: return LexicalUnits.S;
0681: case '?':
0682: if (nextChar() != '>') {
0683: throw createXMLException("pi.end.expected");
0684: }
0685: nextChar();
0686: if (inDTD) {
0687: context = DTD_DECLARATIONS_CONTEXT;
0688: } else if (depth == 0) {
0689: context = TOP_LEVEL_CONTEXT;
0690: } else {
0691: context = CONTENT_CONTEXT;
0692: }
0693: return LexicalUnits.PI_END;
0694:
0695: default:
0696: do {
0697: do {
0698: nextChar();
0699: } while (current != -1 && current != '?');
0700: nextChar();
0701: } while (current != -1 && current != '>');
0702: nextChar();
0703: piEndRead = true;
0704: return LexicalUnits.PI_DATA;
0705: }
0706: }
0707:
0708: /**
0709: * Returns the next lexical unit in the context of a start tag.
0710: */
0711: protected int nextInStartTag() throws IOException, XMLException {
0712: switch (current) {
0713: case 0x9:
0714: case 0xA:
0715: case 0xD:
0716: case 0x20:
0717: do {
0718: nextChar();
0719: } while (current != -1
0720: && XMLUtilities.isXMLSpace((char) current));
0721: return LexicalUnits.S;
0722:
0723: case '/':
0724: if (nextChar() != '>') {
0725: throw createXMLException("malformed.tag.end");
0726: }
0727: nextChar();
0728: context = (--depth == 0) ? TOP_LEVEL_CONTEXT
0729: : CONTENT_CONTEXT;
0730: return LexicalUnits.EMPTY_ELEMENT_END;
0731:
0732: case '>':
0733: nextChar();
0734: context = CONTENT_CONTEXT;
0735: return LexicalUnits.END_CHAR;
0736:
0737: case '=':
0738: nextChar();
0739: return LexicalUnits.EQ;
0740:
0741: case '"':
0742: attrDelimiter = '"';
0743: nextChar();
0744:
0745: for (;;) {
0746: switch (current) {
0747: case '"':
0748: nextChar();
0749: return LexicalUnits.STRING;
0750:
0751: case '&':
0752: context = ATTRIBUTE_VALUE_CONTEXT;
0753: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
0754:
0755: case '<':
0756: throw createXMLException("invalid.character");
0757:
0758: case -1:
0759: throw createXMLException("unexpected.eof");
0760: }
0761: nextChar();
0762: }
0763:
0764: case '\'':
0765: attrDelimiter = '\'';
0766: nextChar();
0767:
0768: for (;;) {
0769: switch (current) {
0770: case '\'':
0771: nextChar();
0772: return LexicalUnits.STRING;
0773:
0774: case '&':
0775: context = ATTRIBUTE_VALUE_CONTEXT;
0776: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
0777:
0778: case '<':
0779: throw createXMLException("invalid.character");
0780:
0781: case -1:
0782: throw createXMLException("unexpected.eof");
0783: }
0784: nextChar();
0785: }
0786:
0787: default:
0788: return readName(LexicalUnits.NAME);
0789: }
0790: }
0791:
0792: /**
0793: * Returns the next lexical unit in the context of an attribute value.
0794: */
0795: protected int nextInAttributeValue() throws IOException,
0796: XMLException {
0797: if (current == -1) {
0798: return LexicalUnits.EOF;
0799: }
0800:
0801: if (current == '&') {
0802: return readReference();
0803:
0804: } else {
0805: loop: for (;;) {
0806: switch (current) {
0807: case '&':
0808: case '<':
0809: case -1:
0810: break loop;
0811: case '"':
0812: case '\'':
0813: if (current == attrDelimiter) {
0814: break loop;
0815: }
0816: }
0817: nextChar();
0818: }
0819:
0820: switch (current) {
0821: case -1:
0822: break;
0823:
0824: case '<':
0825: throw createXMLException("invalid.character");
0826:
0827: case '&':
0828: return LexicalUnits.ATTRIBUTE_FRAGMENT;
0829:
0830: case '\'':
0831: case '"':
0832: nextChar();
0833: if (inDTD) {
0834: context = ATTLIST_CONTEXT;
0835: } else {
0836: context = START_TAG_CONTEXT;
0837: }
0838: }
0839: return LexicalUnits.LAST_ATTRIBUTE_FRAGMENT;
0840: }
0841: }
0842:
0843: /**
0844: * Returns the next lexical unit in the context of an element content.
0845: */
0846: protected int nextInContent() throws IOException, XMLException {
0847: switch (current) {
0848: case -1:
0849: return LexicalUnits.EOF;
0850:
0851: case '&':
0852: return readReference();
0853:
0854: case '<':
0855: switch (nextChar()) {
0856: case '?':
0857: context = PI_CONTEXT;
0858: return readPIStart();
0859:
0860: case '!':
0861: switch (nextChar()) {
0862: case '-':
0863: return readComment();
0864: case '[':
0865: context = CDATA_SECTION_CONTEXT;
0866: return readIdentifier("CDATA[",
0867: LexicalUnits.CDATA_START, -1);
0868: default:
0869: throw createXMLException("invalid.character");
0870: }
0871:
0872: case '/':
0873: nextChar();
0874: context = END_TAG_CONTEXT;
0875: return readName(LexicalUnits.END_TAG);
0876:
0877: default:
0878: depth++;
0879: context = START_TAG_CONTEXT;
0880: return readName(LexicalUnits.START_TAG);
0881: }
0882:
0883: default:
0884: loop: for (;;) {
0885: switch (current) {
0886: default:
0887: nextChar();
0888: break;
0889:
0890: case -1:
0891: case '&':
0892: case '<':
0893: break loop;
0894: }
0895: }
0896: return LexicalUnits.CHARACTER_DATA;
0897: }
0898: }
0899:
0900: /**
0901: * Returns the next lexical unit in the context of a end tag.
0902: */
0903: protected int nextInEndTag() throws IOException, XMLException {
0904: switch (current) {
0905: case 0x9:
0906: case 0xA:
0907: case 0xD:
0908: case 0x20:
0909: do {
0910: nextChar();
0911: } while (current != -1
0912: && XMLUtilities.isXMLSpace((char) current));
0913: return LexicalUnits.S;
0914:
0915: case '>':
0916: if (--depth < 0) {
0917: throw createXMLException("unexpected.end.tag");
0918: } else if (depth == 0) {
0919: context = TOP_LEVEL_CONTEXT;
0920: } else {
0921: context = CONTENT_CONTEXT;
0922: }
0923: nextChar();
0924: return LexicalUnits.END_CHAR;
0925:
0926: default:
0927: throw createXMLException("invalid.character");
0928: }
0929: }
0930:
0931: /**
0932: * Returns the next lexical unit in the context of a CDATA section.
0933: */
0934: protected int nextInCDATASection() throws IOException, XMLException {
0935: if (cdataEndRead) {
0936: cdataEndRead = false;
0937: context = CONTENT_CONTEXT;
0938: return LexicalUnits.SECTION_END;
0939: }
0940:
0941: while (current != -1) {
0942: while (current != ']' && current != -1) {
0943: nextChar();
0944: }
0945: if (current != -1) {
0946: nextChar();
0947: if (current == ']') {
0948: nextChar();
0949: if (current == '>') {
0950: break;
0951: }
0952: }
0953: }
0954: }
0955: if (current == -1) {
0956: throw createXMLException("unexpected.eof");
0957: }
0958: nextChar();
0959: cdataEndRead = true;
0960: return LexicalUnits.CHARACTER_DATA;
0961: }
0962:
0963: /**
0964: * Returns the next lexical unit in the context of an XML declaration.
0965: */
0966: protected int nextInXMLDecl() throws IOException, XMLException {
0967: switch (current) {
0968: case 0x9:
0969: case 0xA:
0970: case 0xD:
0971: case 0x20:
0972: do {
0973: nextChar();
0974: } while (current != -1
0975: && XMLUtilities.isXMLSpace((char) current));
0976: return LexicalUnits.S;
0977: case 'v':
0978: return readIdentifier("ersion",
0979: LexicalUnits.VERSION_IDENTIFIER, -1);
0980: case 'e':
0981: return readIdentifier("ncoding",
0982: LexicalUnits.ENCODING_IDENTIFIER, -1);
0983: case 's':
0984: return readIdentifier("tandalone",
0985: LexicalUnits.STANDALONE_IDENTIFIER, -1);
0986: case '=':
0987: nextChar();
0988: return LexicalUnits.EQ;
0989:
0990: case '?':
0991: nextChar();
0992: if (current != '>') {
0993: throw createXMLException("pi.end.expected");
0994: }
0995: nextChar();
0996: context = TOP_LEVEL_CONTEXT;
0997: return LexicalUnits.PI_END;
0998:
0999: case '"':
1000: attrDelimiter = '"';
1001: return readString();
1002:
1003: case '\'':
1004: attrDelimiter = '\'';
1005: return readString();
1006:
1007: default:
1008: throw createXMLException("invalid.character");
1009: }
1010: }
1011:
1012: /**
1013: * Returns the next lexical unit in the context of a doctype.
1014: */
1015: protected int nextInDoctype() throws IOException, XMLException {
1016: switch (current) {
1017: case 0x9:
1018: case 0xA:
1019: case 0xD:
1020: case 0x20:
1021: do {
1022: nextChar();
1023: } while (current != -1
1024: && XMLUtilities.isXMLSpace((char) current));
1025: return LexicalUnits.S;
1026:
1027: case '>':
1028: nextChar();
1029: context = TOP_LEVEL_CONTEXT;
1030: return LexicalUnits.END_CHAR;
1031:
1032: case 'S':
1033: return readIdentifier("YSTEM",
1034: LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1035:
1036: case 'P':
1037: return readIdentifier("UBLIC",
1038: LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1039:
1040: case '"':
1041: attrDelimiter = '"';
1042: return readString();
1043:
1044: case '\'':
1045: attrDelimiter = '\'';
1046: return readString();
1047:
1048: case '[':
1049: nextChar();
1050: context = DTD_DECLARATIONS_CONTEXT;
1051: inDTD = true;
1052: return LexicalUnits.LSQUARE_BRACKET;
1053:
1054: default:
1055: return readName(LexicalUnits.NAME);
1056: }
1057: }
1058:
1059: /**
1060: * Returns the next lexical unit in the context dtd declarations.
1061: */
1062: protected int nextInDTDDeclarations() throws IOException,
1063: XMLException {
1064: switch (current) {
1065: case 0x9:
1066: case 0xA:
1067: case 0xD:
1068: case 0x20:
1069: do {
1070: nextChar();
1071: } while (current != -1
1072: && XMLUtilities.isXMLSpace((char) current));
1073: return LexicalUnits.S;
1074:
1075: case ']':
1076: nextChar();
1077: context = DOCTYPE_CONTEXT;
1078: inDTD = false;
1079: return LexicalUnits.RSQUARE_BRACKET;
1080:
1081: case '%':
1082: return readPEReference();
1083:
1084: case '<':
1085: switch (nextChar()) {
1086: case '?':
1087: context = PI_CONTEXT;
1088: return readPIStart();
1089:
1090: case '!':
1091: switch (nextChar()) {
1092: case '-':
1093: return readComment();
1094:
1095: case 'E':
1096: switch (nextChar()) {
1097: case 'L':
1098: context = ELEMENT_DECLARATION_CONTEXT;
1099: return readIdentifier("EMENT",
1100: LexicalUnits.ELEMENT_DECLARATION_START,
1101: -1);
1102: case 'N':
1103: context = ENTITY_CONTEXT;
1104: return readIdentifier("TITY",
1105: LexicalUnits.ENTITY_START, -1);
1106: default:
1107: throw createXMLException("invalid.character");
1108: }
1109:
1110: case 'A':
1111: context = ATTLIST_CONTEXT;
1112: return readIdentifier("TTLIST",
1113: LexicalUnits.ATTLIST_START, -1);
1114: case 'N':
1115: context = NOTATION_CONTEXT;
1116: return readIdentifier("OTATION",
1117: LexicalUnits.NOTATION_START, -1);
1118: default:
1119: throw createXMLException("invalid.character");
1120: }
1121: default:
1122: throw createXMLException("invalid.character");
1123: }
1124: default:
1125: throw createXMLException("invalid.character");
1126: }
1127: }
1128:
1129: /**
1130: * Reads a simple string, like the ones used for version, encoding,
1131: * public/system identifiers...
1132: * The current character must be the string delimiter.
1133: * @return type.
1134: */
1135: protected int readString() throws IOException, XMLException {
1136: do {
1137: nextChar();
1138: } while (current != -1 && current != attrDelimiter);
1139: if (current == -1) {
1140: throw createXMLException("unexpected.eof");
1141: }
1142: nextChar();
1143: return LexicalUnits.STRING;
1144: }
1145:
1146: /**
1147: * Reads a comment. '<!-' must have been read.
1148: */
1149: protected int readComment() throws IOException, XMLException {
1150: if (nextChar() != '-') {
1151: throw createXMLException("malformed.comment");
1152: }
1153: int c = nextChar();
1154: while (c != -1) {
1155: while (c != -1 && c != '-') {
1156: c = nextChar();
1157: }
1158: c = nextChar();
1159: if (c == '-') {
1160: break;
1161: }
1162: }
1163: if (c == -1) {
1164: throw createXMLException("unexpected.eof");
1165: }
1166: c = nextChar();
1167: if (c != '>') {
1168: throw createXMLException("malformed.comment");
1169: }
1170: nextChar();
1171: return LexicalUnits.COMMENT;
1172: }
1173:
1174: /**
1175: * Reads the given identifier.
1176: * @param s The portion of the identifier to read.
1177: * @param type The lexical unit type of the identifier.
1178: * @param ntype The lexical unit type to set if the identifier do not
1179: * match or -1 if an error must be signaled.
1180: */
1181: protected int readIdentifier(String s, int type, int ntype)
1182: throws IOException, XMLException {
1183: int len = s.length();
1184: for (int i = 0; i < len; i++) {
1185: nextChar();
1186: if (current != s.charAt(i)) {
1187: if (ntype == -1) {
1188: throw createXMLException("invalid.character");
1189: } else {
1190: while (current != -1
1191: && XMLUtilities
1192: .isXMLNameCharacter((char) current)) {
1193: nextChar();
1194: }
1195: return ntype;
1196: }
1197: }
1198: }
1199: nextChar();
1200: return type;
1201: }
1202:
1203: /**
1204: * Reads a name. The current character must be the first character.
1205: * @param type The lexical unit type to set.
1206: * @return type.
1207: */
1208: protected int readName(int type) throws IOException, XMLException {
1209: if (current == -1) {
1210: throw createXMLException("unexpected.eof");
1211: }
1212: if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
1213: throw createXMLException("invalid.name");
1214: }
1215: do {
1216: nextChar();
1217: } while (current != -1
1218: && XMLUtilities.isXMLNameCharacter((char) current));
1219: return type;
1220: }
1221:
1222: /**
1223: * Reads a processing instruction start.
1224: * @return type.
1225: */
1226: protected int readPIStart() throws IOException, XMLException {
1227: int c1 = nextChar();
1228: if (c1 == -1) {
1229: throw createXMLException("unexpected.eof");
1230: }
1231: if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
1232: throw createXMLException("malformed.pi.target");
1233: }
1234: int c2 = nextChar();
1235: if (c2 == -1 || !XMLUtilities.isXMLNameCharacter((char) c2)) {
1236: return LexicalUnits.PI_START;
1237: }
1238: int c3 = nextChar();
1239: if (c3 == -1 || !XMLUtilities.isXMLNameCharacter((char) c3)) {
1240: return LexicalUnits.PI_START;
1241: }
1242: int c4 = nextChar();
1243: if (c4 != -1 && XMLUtilities.isXMLNameCharacter((char) c4)) {
1244: do {
1245: nextChar();
1246: } while (current != -1
1247: && XMLUtilities.isXMLNameCharacter((char) current));
1248: return LexicalUnits.PI_START;
1249: }
1250: if ((c1 == 'x' || c1 == 'X') && (c2 == 'm' || c2 == 'M')
1251: && (c3 == 'l' || c3 == 'L')) {
1252: throw createXMLException("xml.reserved");
1253: }
1254: return LexicalUnits.PI_START;
1255: }
1256:
1257: /**
1258: * Returns the next lexical unit in the context of a element declaration.
1259: */
1260: protected int nextInElementDeclaration() throws IOException,
1261: XMLException {
1262: switch (current) {
1263: case 0x9:
1264: case 0xA:
1265: case 0xD:
1266: case 0x20:
1267: do {
1268: nextChar();
1269: } while (current != -1
1270: && XMLUtilities.isXMLSpace((char) current));
1271: return LexicalUnits.S;
1272:
1273: case '>':
1274: nextChar();
1275: context = DTD_DECLARATIONS_CONTEXT;
1276: return LexicalUnits.END_CHAR;
1277:
1278: case '%':
1279: nextChar();
1280: int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1281: if (current != ';') {
1282: throw createXMLException("malformed.parameter.entity");
1283: }
1284: nextChar();
1285: return t;
1286:
1287: case 'E':
1288: return readIdentifier("MPTY",
1289: LexicalUnits.EMPTY_IDENTIFIER, LexicalUnits.NAME);
1290:
1291: case 'A':
1292: return readIdentifier("NY", LexicalUnits.ANY_IDENTIFIER,
1293: LexicalUnits.NAME);
1294:
1295: case '?':
1296: nextChar();
1297: return LexicalUnits.QUESTION;
1298:
1299: case '+':
1300: nextChar();
1301: return LexicalUnits.PLUS;
1302:
1303: case '*':
1304: nextChar();
1305: return LexicalUnits.STAR;
1306:
1307: case '(':
1308: nextChar();
1309: return LexicalUnits.LEFT_BRACE;
1310:
1311: case ')':
1312: nextChar();
1313: return LexicalUnits.RIGHT_BRACE;
1314:
1315: case '|':
1316: nextChar();
1317: return LexicalUnits.PIPE;
1318:
1319: case ',':
1320: nextChar();
1321: return LexicalUnits.COMMA;
1322:
1323: case '#':
1324: return readIdentifier("PCDATA",
1325: LexicalUnits.PCDATA_IDENTIFIER, -1);
1326:
1327: default:
1328: return readName(LexicalUnits.NAME);
1329: }
1330: }
1331:
1332: /**
1333: * Returns the next lexical unit in the context of an attribute list.
1334: */
1335: protected int nextInAttList() throws IOException, XMLException {
1336: switch (current) {
1337: case 0x9:
1338: case 0xA:
1339: case 0xD:
1340: case 0x20:
1341: do {
1342: nextChar();
1343: } while (current != -1
1344: && XMLUtilities.isXMLSpace((char) current));
1345: return LexicalUnits.S;
1346:
1347: case '>':
1348: nextChar();
1349: context = DTD_DECLARATIONS_CONTEXT;
1350: return type = LexicalUnits.END_CHAR;
1351:
1352: case '%':
1353: int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1354: if (current != ';') {
1355: throw createXMLException("malformed.parameter.entity");
1356: }
1357: nextChar();
1358: return t;
1359:
1360: case 'C':
1361: return readIdentifier("DATA",
1362: LexicalUnits.CDATA_IDENTIFIER, LexicalUnits.NAME);
1363:
1364: case 'I':
1365: nextChar();
1366: if (current != 'D') {
1367: do {
1368: nextChar();
1369: } while (current != -1
1370: && XMLUtilities
1371: .isXMLNameCharacter((char) current));
1372: return LexicalUnits.NAME;
1373: }
1374: nextChar();
1375: if (current == -1
1376: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1377: return LexicalUnits.ID_IDENTIFIER;
1378: }
1379: if (current != 'R') {
1380: do {
1381: nextChar();
1382: } while (current != -1
1383: && XMLUtilities
1384: .isXMLNameCharacter((char) current));
1385: return LexicalUnits.NAME;
1386: }
1387: nextChar();
1388: if (current == -1
1389: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1390: return LexicalUnits.NAME;
1391: }
1392: if (current != 'E') {
1393: do {
1394: nextChar();
1395: } while (current != -1
1396: && XMLUtilities
1397: .isXMLNameCharacter((char) current));
1398: return LexicalUnits.NAME;
1399: }
1400: nextChar();
1401: if (current == -1
1402: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1403: return LexicalUnits.NAME;
1404: }
1405: if (current != 'F') {
1406: do {
1407: nextChar();
1408: } while (current != -1
1409: && XMLUtilities
1410: .isXMLNameCharacter((char) current));
1411: return LexicalUnits.NAME;
1412: }
1413: nextChar();
1414: if (current == -1
1415: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1416: return LexicalUnits.IDREF_IDENTIFIER;
1417: }
1418: if (current != 'S') {
1419: do {
1420: nextChar();
1421: } while (current != -1
1422: && XMLUtilities
1423: .isXMLNameCharacter((char) current));
1424: return LexicalUnits.NAME;
1425: }
1426: nextChar();
1427: if (current == -1
1428: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1429: return LexicalUnits.IDREFS_IDENTIFIER;
1430: }
1431: do {
1432: nextChar();
1433: } while (current != -1
1434: && XMLUtilities.isXMLNameCharacter((char) current));
1435: return type = LexicalUnits.NAME;
1436:
1437: case 'N':
1438: switch (nextChar()) {
1439: default:
1440: do {
1441: nextChar();
1442: } while (current != -1
1443: && XMLUtilities
1444: .isXMLNameCharacter((char) current));
1445: return LexicalUnits.NAME;
1446:
1447: case 'O':
1448: context = NOTATION_TYPE_CONTEXT;
1449: return readIdentifier("TATION",
1450: LexicalUnits.NOTATION_IDENTIFIER,
1451: LexicalUnits.NAME);
1452:
1453: case 'M':
1454: nextChar();
1455: if (current == -1
1456: || !XMLUtilities
1457: .isXMLNameCharacter((char) current)) {
1458: return LexicalUnits.NAME;
1459: }
1460: if (current != 'T') {
1461: do {
1462: nextChar();
1463: } while (current != -1
1464: && XMLUtilities
1465: .isXMLNameCharacter((char) current));
1466: return LexicalUnits.NAME;
1467: }
1468: nextChar();
1469: if (current == -1
1470: || !XMLUtilities
1471: .isXMLNameCharacter((char) current)) {
1472: return LexicalUnits.NAME;
1473: }
1474: if (current != 'O') {
1475: do {
1476: nextChar();
1477: } while (current != -1
1478: && XMLUtilities
1479: .isXMLNameCharacter((char) current));
1480: return LexicalUnits.NAME;
1481: }
1482: nextChar();
1483: if (current == -1
1484: || !XMLUtilities
1485: .isXMLNameCharacter((char) current)) {
1486: return LexicalUnits.NAME;
1487: }
1488: if (current != 'K') {
1489: do {
1490: nextChar();
1491: } while (current != -1
1492: && XMLUtilities
1493: .isXMLNameCharacter((char) current));
1494: return LexicalUnits.NAME;
1495: }
1496: nextChar();
1497: if (current == -1
1498: || !XMLUtilities
1499: .isXMLNameCharacter((char) current)) {
1500: return LexicalUnits.NAME;
1501: }
1502: if (current != 'E') {
1503: do {
1504: nextChar();
1505: } while (current != -1
1506: && XMLUtilities
1507: .isXMLNameCharacter((char) current));
1508: return LexicalUnits.NAME;
1509: }
1510: nextChar();
1511: if (current == -1
1512: || !XMLUtilities
1513: .isXMLNameCharacter((char) current)) {
1514: return LexicalUnits.NAME;
1515: }
1516: if (current != 'N') {
1517: do {
1518: nextChar();
1519: } while (current != -1
1520: && XMLUtilities
1521: .isXMLNameCharacter((char) current));
1522: return LexicalUnits.NAME;
1523: }
1524: nextChar();
1525: if (current == -1
1526: || !XMLUtilities
1527: .isXMLNameCharacter((char) current)) {
1528: return LexicalUnits.NMTOKEN_IDENTIFIER;
1529: }
1530: if (current != 'S') {
1531: do {
1532: nextChar();
1533: } while (current != -1
1534: && XMLUtilities
1535: .isXMLNameCharacter((char) current));
1536: return LexicalUnits.NAME;
1537: }
1538: nextChar();
1539: if (current == -1
1540: || !XMLUtilities
1541: .isXMLNameCharacter((char) current)) {
1542: return LexicalUnits.NMTOKENS_IDENTIFIER;
1543: }
1544: do {
1545: nextChar();
1546: } while (current != -1
1547: && XMLUtilities
1548: .isXMLNameCharacter((char) current));
1549: return LexicalUnits.NAME;
1550: }
1551:
1552: case 'E':
1553: nextChar();
1554: if (current != 'N') {
1555: do {
1556: nextChar();
1557: } while (current != -1
1558: && XMLUtilities
1559: .isXMLNameCharacter((char) current));
1560: return LexicalUnits.NAME;
1561: }
1562: nextChar();
1563: if (current == -1
1564: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1565: return LexicalUnits.NAME;
1566: }
1567: if (current != 'T') {
1568: do {
1569: nextChar();
1570: } while (current != -1
1571: && XMLUtilities
1572: .isXMLNameCharacter((char) current));
1573: return LexicalUnits.NAME;
1574: }
1575: nextChar();
1576: if (current == -1
1577: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1578: return LexicalUnits.NAME;
1579: }
1580: if (current != 'I') {
1581: do {
1582: nextChar();
1583: } while (current != -1
1584: && XMLUtilities
1585: .isXMLNameCharacter((char) current));
1586: return LexicalUnits.NAME;
1587: }
1588: nextChar();
1589: if (current == -1
1590: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1591: return LexicalUnits.NAME;
1592: }
1593: if (current != 'T') {
1594: do {
1595: nextChar();
1596: } while (current != -1
1597: && XMLUtilities
1598: .isXMLNameCharacter((char) current));
1599: return type = LexicalUnits.NAME;
1600: }
1601: nextChar();
1602: if (current == -1
1603: || !XMLUtilities.isXMLNameCharacter((char) current)) {
1604: return LexicalUnits.NAME;
1605: }
1606: switch (current) {
1607: case 'Y':
1608: nextChar();
1609: if (current == -1
1610: || !XMLUtilities
1611: .isXMLNameCharacter((char) current)) {
1612: return LexicalUnits.ENTITY_IDENTIFIER;
1613: }
1614: do {
1615: nextChar();
1616: } while (current != -1
1617: && XMLUtilities
1618: .isXMLNameCharacter((char) current));
1619: return LexicalUnits.NAME;
1620: case 'I':
1621: nextChar();
1622: if (current == -1
1623: || !XMLUtilities
1624: .isXMLNameCharacter((char) current)) {
1625: return LexicalUnits.NAME;
1626: }
1627: if (current != 'E') {
1628: do {
1629: nextChar();
1630: } while (current != -1
1631: && XMLUtilities
1632: .isXMLNameCharacter((char) current));
1633: return LexicalUnits.NAME;
1634: }
1635: nextChar();
1636: if (current == -1
1637: || !XMLUtilities
1638: .isXMLNameCharacter((char) current)) {
1639: return LexicalUnits.NAME;
1640: }
1641: if (current != 'S') {
1642: do {
1643: nextChar();
1644: } while (current != -1
1645: && XMLUtilities
1646: .isXMLNameCharacter((char) current));
1647: return LexicalUnits.NAME;
1648: }
1649: return LexicalUnits.ENTITIES_IDENTIFIER;
1650:
1651: default:
1652: if (current == -1
1653: || !XMLUtilities
1654: .isXMLNameCharacter((char) current)) {
1655: return LexicalUnits.NAME;
1656: }
1657: do {
1658: nextChar();
1659: } while (current != -1
1660: && XMLUtilities
1661: .isXMLNameCharacter((char) current));
1662: return LexicalUnits.NAME;
1663: }
1664:
1665: case '"':
1666: attrDelimiter = '"';
1667: nextChar();
1668: if (current == -1) {
1669: throw createXMLException("unexpected.eof");
1670: }
1671: if (current != '"' && current != '&') {
1672: do {
1673: nextChar();
1674: } while (current != -1 && current != '"'
1675: && current != '&');
1676: }
1677: switch (current) {
1678: case '&':
1679: context = ATTRIBUTE_VALUE_CONTEXT;
1680: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1681:
1682: case '"':
1683: nextChar();
1684: return LexicalUnits.STRING;
1685:
1686: default:
1687: throw createXMLException("invalid.character");
1688: }
1689:
1690: case '\'':
1691: attrDelimiter = '\'';
1692: nextChar();
1693: if (current == -1) {
1694: throw createXMLException("unexpected.eof");
1695: }
1696: if (current != '\'' && current != '&') {
1697: do {
1698: nextChar();
1699: } while (current != -1 && current != '\''
1700: && current != '&');
1701: }
1702: switch (current) {
1703: case '&':
1704: context = ATTRIBUTE_VALUE_CONTEXT;
1705: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1706:
1707: case '\'':
1708: nextChar();
1709: return LexicalUnits.STRING;
1710:
1711: default:
1712: throw createXMLException("invalid.character");
1713: }
1714:
1715: case '#':
1716: switch (nextChar()) {
1717: case 'R':
1718: return readIdentifier("EQUIRED",
1719: LexicalUnits.REQUIRED_IDENTIFIER, -1);
1720:
1721: case 'I':
1722: return readIdentifier("MPLIED",
1723: LexicalUnits.IMPLIED_IDENTIFIER, -1);
1724: case 'F':
1725: return readIdentifier("IXED",
1726: LexicalUnits.FIXED_IDENTIFIER, -1);
1727: default:
1728: throw createXMLException("invalid.character");
1729: }
1730:
1731: case '(':
1732: nextChar();
1733: context = ENUMERATION_CONTEXT;
1734: return LexicalUnits.LEFT_BRACE;
1735:
1736: default:
1737: return readName(LexicalUnits.NAME);
1738: }
1739: }
1740:
1741: /**
1742: * Returns the next lexical unit in the context of a notation.
1743: */
1744: protected int nextInNotation() throws IOException, XMLException {
1745: switch (current) {
1746: case 0x9:
1747: case 0xA:
1748: case 0xD:
1749: case 0x20:
1750: do {
1751: nextChar();
1752: } while (current != -1
1753: && XMLUtilities.isXMLSpace((char) current));
1754: return LexicalUnits.S;
1755:
1756: case '>':
1757: nextChar();
1758: context = DTD_DECLARATIONS_CONTEXT;
1759: return LexicalUnits.END_CHAR;
1760:
1761: case '%':
1762: int t = readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1763: if (current != ';') {
1764: throw createXMLException("malformed.parameter.entity");
1765: }
1766: nextChar();
1767: return t;
1768: case 'S':
1769: return readIdentifier("YSTEM",
1770: LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1771:
1772: case 'P':
1773: return readIdentifier("UBLIC",
1774: LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1775:
1776: case '"':
1777: attrDelimiter = '"';
1778: return readString();
1779:
1780: case '\'':
1781: attrDelimiter = '\'';
1782: return readString();
1783:
1784: default:
1785: return readName(LexicalUnits.NAME);
1786: }
1787: }
1788:
1789: /**
1790: * Returns the next lexical unit in the context of an entity.
1791: */
1792: protected int nextInEntity() throws IOException, XMLException {
1793: switch (current) {
1794: case 0x9:
1795: case 0xA:
1796: case 0xD:
1797: case 0x20:
1798: do {
1799: nextChar();
1800: } while (current != -1
1801: && XMLUtilities.isXMLSpace((char) current));
1802: return LexicalUnits.S;
1803:
1804: case '>':
1805: nextChar();
1806: context = DTD_DECLARATIONS_CONTEXT;
1807: return LexicalUnits.END_CHAR;
1808:
1809: case '%':
1810: nextChar();
1811: return LexicalUnits.PERCENT;
1812:
1813: case 'S':
1814: return readIdentifier("YSTEM",
1815: LexicalUnits.SYSTEM_IDENTIFIER, LexicalUnits.NAME);
1816:
1817: case 'P':
1818: return readIdentifier("UBLIC",
1819: LexicalUnits.PUBLIC_IDENTIFIER, LexicalUnits.NAME);
1820:
1821: case 'N':
1822: return readIdentifier("DATA",
1823: LexicalUnits.NDATA_IDENTIFIER, LexicalUnits.NAME);
1824:
1825: case '"':
1826: attrDelimiter = '"';
1827: nextChar();
1828: if (current == -1) {
1829: throw createXMLException("unexpected.eof");
1830: }
1831:
1832: if (current != '"' && current != '&' && current != '%') {
1833: do {
1834: nextChar();
1835: } while (current != -1 && current != '"'
1836: && current != '&' && current != '%');
1837: }
1838: switch (current) {
1839: default:
1840: throw createXMLException("invalid.character");
1841:
1842: case '&':
1843: case '%':
1844: context = ENTITY_VALUE_CONTEXT;
1845: break;
1846:
1847: case '"':
1848: nextChar();
1849: return LexicalUnits.STRING;
1850: }
1851: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1852:
1853: case '\'':
1854: attrDelimiter = '\'';
1855: nextChar();
1856: if (current == -1) {
1857: throw createXMLException("unexpected.eof");
1858: }
1859:
1860: if (current != '\'' && current != '&' && current != '%') {
1861: do {
1862: nextChar();
1863: } while (current != -1 && current != '\''
1864: && current != '&' && current != '%');
1865: }
1866: switch (current) {
1867: default:
1868: throw createXMLException("invalid.character");
1869:
1870: case '&':
1871: case '%':
1872: context = ENTITY_VALUE_CONTEXT;
1873: break;
1874:
1875: case '\'':
1876: nextChar();
1877: return LexicalUnits.STRING;
1878: }
1879: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1880:
1881: default:
1882: return readName(LexicalUnits.NAME);
1883: }
1884: }
1885:
1886: /**
1887: * Returns the next lexical unit in the context of an entity value.
1888: */
1889: protected int nextInEntityValue() throws IOException, XMLException {
1890: switch (current) {
1891: case '&':
1892: return readReference();
1893:
1894: case '%':
1895: int t = nextChar();
1896: readName(LexicalUnits.PARAMETER_ENTITY_REFERENCE);
1897: if (current != ';') {
1898: throw createXMLException("invalid.parameter.entity");
1899: }
1900: nextChar();
1901: return t;
1902:
1903: default:
1904: while (current != -1 && current != attrDelimiter
1905: && current != '&' && current != '%') {
1906: nextChar();
1907: }
1908: switch (current) {
1909: case -1:
1910: throw createXMLException("unexpected.eof");
1911:
1912: case '\'':
1913: case '"':
1914: nextChar();
1915: context = ENTITY_CONTEXT;
1916: return LexicalUnits.STRING;
1917: }
1918: return LexicalUnits.FIRST_ATTRIBUTE_FRAGMENT;
1919: }
1920: }
1921:
1922: /**
1923: * Returns the next lexical unit in the context of a notation type.
1924: */
1925: protected int nextInNotationType() throws IOException, XMLException {
1926: switch (current) {
1927: case 0x9:
1928: case 0xA:
1929: case 0xD:
1930: case 0x20:
1931: do {
1932: nextChar();
1933: } while (current != -1
1934: && XMLUtilities.isXMLSpace((char) current));
1935: return LexicalUnits.S;
1936:
1937: case '|':
1938: nextChar();
1939: return LexicalUnits.PIPE;
1940:
1941: case '(':
1942: nextChar();
1943: return LexicalUnits.LEFT_BRACE;
1944:
1945: case ')':
1946: nextChar();
1947: context = ATTLIST_CONTEXT;
1948: return LexicalUnits.RIGHT_BRACE;
1949:
1950: default:
1951: return readName(LexicalUnits.NAME);
1952: }
1953: }
1954:
1955: /**
1956: * Returns the next lexical unit in the context of an enumeration.
1957: */
1958: protected int nextInEnumeration() throws IOException, XMLException {
1959: switch (current) {
1960: case 0x9:
1961: case 0xA:
1962: case 0xD:
1963: case 0x20:
1964: do {
1965: nextChar();
1966: } while (current != -1
1967: && XMLUtilities.isXMLSpace((char) current));
1968: return LexicalUnits.S;
1969:
1970: case '|':
1971: nextChar();
1972: return LexicalUnits.PIPE;
1973:
1974: case ')':
1975: nextChar();
1976: context = ATTLIST_CONTEXT;
1977: return LexicalUnits.RIGHT_BRACE;
1978:
1979: default:
1980: return readNmtoken();
1981: }
1982: }
1983:
1984: /**
1985: * Reads an entity or character reference. The current character
1986: * must be '&'.
1987: * @return type.
1988: */
1989: protected int readReference() throws IOException, XMLException {
1990: nextChar();
1991: if (current == '#') {
1992: nextChar();
1993: int i = 0;
1994: switch (current) {
1995: case 'x':
1996: do {
1997: i++;
1998: nextChar();
1999: } while ((current >= '0' && current <= '9')
2000: || (current >= 'a' && current <= 'f')
2001: || (current >= 'A' && current <= 'F'));
2002: break;
2003:
2004: default:
2005: do {
2006: i++;
2007: nextChar();
2008: } while (current >= '0' && current <= '9');
2009: break;
2010:
2011: case -1:
2012: throw createXMLException("unexpected.eof");
2013: }
2014: if (i == 1 || current != ';') {
2015: throw createXMLException("character.reference");
2016: }
2017: nextChar();
2018: return LexicalUnits.CHARACTER_REFERENCE;
2019: } else {
2020: int t = readName(LexicalUnits.ENTITY_REFERENCE);
2021: if (current != ';') {
2022: throw createXMLException("character.reference");
2023: }
2024: nextChar();
2025: return t;
2026: }
2027: }
2028:
2029: /**
2030: * Reads a parameter entity reference. The current character must be '%'.
2031: * @return type.
2032: */
2033: protected int readPEReference() throws IOException, XMLException {
2034: nextChar();
2035: if (current == -1) {
2036: throw createXMLException("unexpected.eof");
2037: }
2038: if (!XMLUtilities.isXMLNameFirstCharacter((char) current)) {
2039: throw createXMLException("invalid.parameter.entity");
2040: }
2041: do {
2042: nextChar();
2043: } while (current != -1
2044: && XMLUtilities.isXMLNameCharacter((char) current));
2045: if (current != ';') {
2046: throw createXMLException("invalid.parameter.entity");
2047: }
2048: nextChar();
2049: return LexicalUnits.PARAMETER_ENTITY_REFERENCE;
2050: }
2051:
2052: /**
2053: * Reads a Nmtoken. The current character must be the first character.
2054: * @return LexicalUnits.NMTOKEN.
2055: */
2056: protected int readNmtoken() throws IOException, XMLException {
2057: if (current == -1) {
2058: throw createXMLException("unexpected.eof");
2059: }
2060: while (XMLUtilities.isXMLNameCharacter((char) current)) {
2061: nextChar();
2062: }
2063: return LexicalUnits.NMTOKEN;
2064: }
2065:
2066: /**
2067: * Sets the value of the current char to the next character or -1 if the
2068: * end of stream has been reached.
2069: */
2070: protected int nextChar() throws IOException {
2071: current = reader.read();
2072:
2073: if (current == -1) {
2074: return current;
2075: }
2076:
2077: if (position == buffer.length) {
2078: char[] t = new char[1 + position + position / 2];
2079: System.arraycopy(buffer, 0, t, 0, position);
2080: buffer = t;
2081: }
2082:
2083: return buffer[position++] = (char) current;
2084: }
2085:
2086: /**
2087: * Returns an XMLException initialized with the given message key.
2088: */
2089: protected XMLException createXMLException(String message) {
2090: String m;
2091: try {
2092: m = formatMessage(message, new Object[] {
2093: new Integer(reader.getLine()),
2094: new Integer(reader.getColumn()) });
2095: } catch (MissingResourceException e) {
2096: m = message;
2097: }
2098: return new XMLException(m);
2099: }
2100:
2101: }
|