0001: /*
0002: *
0003: *
0004: * Copyright 1990-2007 Sun Microsystems, Inc. All Rights Reserved.
0005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
0006: *
0007: * This program is free software; you can redistribute it and/or
0008: * modify it under the terms of the GNU General Public License version
0009: * 2 only, as published by the Free Software Foundation.
0010: *
0011: * This program is distributed in the hope that it will be useful, but
0012: * WITHOUT ANY WARRANTY; without even the implied warranty of
0013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0014: * General Public License version 2 for more details (a copy is
0015: * included at /legal/license.txt).
0016: *
0017: * You should have received a copy of the GNU General Public License
0018: * version 2 along with this work; if not, write to the Free Software
0019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
0020: * 02110-1301 USA
0021: *
0022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
0023: * Clara, CA 95054 or visit www.sun.com if you need additional
0024: * information or have any questions.
0025: */
0026:
0027: package com.sun.ukit.jaxp;
0028:
0029: import java.util.Hashtable;
0030: import java.io.InputStream;
0031: import java.io.Reader;
0032: import java.io.InputStreamReader;
0033: import java.io.IOException;
0034: import java.io.UnsupportedEncodingException;
0035:
0036: import org.xml.sax.helpers.DefaultHandler;
0037: import org.xml.sax.Locator;
0038: import org.xml.sax.InputSource;
0039: import org.xml.sax.Attributes;
0040: import org.xml.sax.SAXParseException;
0041: import org.xml.sax.SAXException;
0042:
0043: import javax.xml.parsers.SAXParser;
0044:
0045: /**
0046: * XML non-validating parser.
0047: *
0048: * This non-validating parser conforms to <a href="http://www.w3.org/TR/REC-xml"
0049: * >Extensible Markup Language (XML) 1.0</a> and <a href="http://www.w3.org/TR/REC-xml-names"
0050: * >"Namespaces in XML"</a> specifications.
0051: * The API used by the parser is <a href="http://www.jcp.org/en/jsr/detail?id=172"
0052: * >JSR-172</a> subset of <a href="http://java.sun.com/xml/jaxp/index.html">JAXP</a>
0053: * and <a href="http://www.saxproject.org/">SAX2</a>.
0054: *
0055: * @see org.xml.sax.helpers.DefaultHandler
0056: */
0057:
0058: public final class Parser extends SAXParser implements Locator {
0059: public final static String FAULT = "";
0060:
0061: private final static int BUFFSIZE_READER = 512;
0062: private final static int BUFFSIZE_PARSER = 128;
0063:
0064: /** The end of stream character. */
0065: public final static char EOS = 0xffff;
0066:
0067: private Pair mNoNS; // there is no namespace
0068: private Pair mXml; // the xml namespace
0069:
0070: private DefaultHandler mHand; // a document handler
0071: private Hashtable mEnt; // the entities look up table
0072: private Hashtable mPEnt; // the parmeter entities look up table
0073:
0074: private boolean mIsSAlone; // xml decl standalone flag
0075: private boolean mIsNSAware; // if true - to report QName
0076:
0077: private short mSt; // global state of the parser
0078: // mSt values:
0079: // - 0 : the begining of the document
0080: // - 1 : misc before DTD
0081: // - 2 : DTD
0082: // - 3 : misc after DTD
0083: // - 4 : document's element
0084: // - 5 : misc after document's element
0085:
0086: private char mESt; // built-in entity recognizer state
0087: // mESt values:
0088: // 0x100 : the initial state
0089: // > 0x100 : unrecognized name
0090: // < 0x100 : replacement character
0091:
0092: private char[] mBuff; // parser buffer
0093: private int mBuffIdx; // index of the last char
0094:
0095: private Pair mPref; // stack of prefixes
0096: private Pair mElm; // stack of elements
0097:
0098: private Pair mAttL; // list of defined attributes by element name
0099:
0100: private Input mInp; // stack of entities
0101: private Input mDoc; // document entity
0102:
0103: private char[] mChars; // reading buffer
0104: private int mChLen; // current capacity
0105: private int mChIdx; // index to the next char
0106:
0107: private Attrs mAttrs; // attributes of the curr. element
0108: private String[] mItems; // attributes array of the curr. element
0109: private char mAttrIdx; // attributes counter/index
0110:
0111: private Pair mDltd; // deleted objects for reuse
0112:
0113: /**
0114: * Default prefixes
0115: */
0116: private final static char NONS[];
0117: private final static char XML[];
0118: private final static char XMLNS[];
0119: static {
0120: NONS = new char[1];
0121: NONS[0] = (char) 0;
0122:
0123: XML = new char[4];
0124: XML[0] = (char) 4;
0125: XML[1] = 'x';
0126: XML[2] = 'm';
0127: XML[3] = 'l';
0128:
0129: XMLNS = new char[6];
0130: XMLNS[0] = (char) 6;
0131: XMLNS[1] = 'x';
0132: XMLNS[2] = 'm';
0133: XMLNS[3] = 'l';
0134: XMLNS[4] = 'n';
0135: XMLNS[5] = 's';
0136: }
0137:
0138: /**
0139: * ASCII character type array.
0140: *
0141: * This array maps an ASCII (7 bit) character to the character type.<br />
0142: * Possible character type values are:<br />
0143: * - ' ' for any kind of white space character;<br />
0144: * - 'a' for any lower case alphabetical character value;<br />
0145: * - 'A' for any upper case alphabetical character value;<br />
0146: * - 'd' for any decimal digit character value;<br />
0147: * - 'z' for any character less then ' ' except '\t', '\n', '\r';<br />
0148: * An ASCII (7 bit) character which does not fall in any category listed
0149: * above is mapped to it self.
0150: */
0151: private static final byte asctyp[];
0152:
0153: /**
0154: * NMTOKEN character type array.
0155: *
0156: * This array maps an ASCII (7 bit) character to the character type.<br />
0157: * Possible character type values are:<br />
0158: * - 0 for underscore ('_') or any lower and upper case alphabetical character value;<br />
0159: * - 1 for colon (':') character;<br />
0160: * - 2 for dash ('-') and dot ('.') or any decimal digit character value;<br />
0161: * - 3 for any kind of white space character<br />
0162: * An ASCII (7 bit) character which does not fall in any category listed
0163: * above is mapped to 0xff.
0164: */
0165: private static final byte nmttyp[];
0166:
0167: /**
0168: * Static constructor.
0169: *
0170: * Sets up the ASCII character type array which is used by
0171: * {@link #asctyp asctyp} method and NMTOKEN character type array.
0172: */
0173: static {
0174: short i = 0;
0175:
0176: asctyp = new byte[0x80];
0177: while (i < ' ')
0178: asctyp[i++] = (byte) 'z';
0179: asctyp['\t'] = (byte) ' ';
0180: asctyp['\r'] = (byte) ' ';
0181: asctyp['\n'] = (byte) ' ';
0182: while (i < '0')
0183: asctyp[i] = (byte) i++;
0184: while (i <= '9')
0185: asctyp[i++] = (byte) 'd';
0186: while (i < 'A')
0187: asctyp[i] = (byte) i++;
0188: while (i <= 'Z')
0189: asctyp[i++] = (byte) 'A';
0190: while (i < 'a')
0191: asctyp[i] = (byte) i++;
0192: while (i <= 'z')
0193: asctyp[i++] = (byte) 'a';
0194: while (i < 0x80)
0195: asctyp[i] = (byte) i++;
0196:
0197: nmttyp = new byte[0x80];
0198: for (i = 0; i < '0'; i++)
0199: nmttyp[i] = (byte) 0xff;
0200: while (i <= '9')
0201: nmttyp[i++] = (byte) 2; // digits
0202: while (i < 'A')
0203: nmttyp[i++] = (byte) 0xff;
0204: // skiped upper case alphabetical character are already 0
0205: for (i = '['; i < 'a'; i++)
0206: nmttyp[i] = (byte) 0xff;
0207: // skiped lower case alphabetical character are already 0
0208: for (i = '{'; i < 0x80; i++)
0209: nmttyp[i] = (byte) 0xff;
0210: nmttyp['_'] = 0;
0211: nmttyp[':'] = 1;
0212: nmttyp['.'] = 2;
0213: nmttyp['-'] = 2;
0214: nmttyp[' '] = 3;
0215: nmttyp['\t'] = 3;
0216: nmttyp['\r'] = 3;
0217: nmttyp['\n'] = 3;
0218: }
0219:
0220: /**
0221: * Constructor.
0222: */
0223: public Parser(boolean nsaware) {
0224: super ();
0225: mIsNSAware = nsaware;
0226:
0227: // Initialize the parser
0228: mBuff = new char[BUFFSIZE_PARSER];
0229: mAttrs = new Attrs();
0230:
0231: // Default namespace
0232: mPref = pair(mPref);
0233: mPref.name = "";
0234: mPref.value = "";
0235: mPref.chars = NONS;
0236: mNoNS = mPref; // no namespace
0237: // XML namespace
0238: mPref = pair(mPref);
0239: mPref.name = "xml";
0240: mPref.value = "http://www.w3.org/XML/1998/namespace";
0241: mPref.chars = XML;
0242: mXml = mPref; // XML namespace
0243: }
0244:
0245: /**
0246: * Return the public identifier for the current document event.
0247: *
0248: * <p>The return value is the public identifier of the document
0249: * entity or of the external parsed entity in which the markup
0250: * triggering the event appears.</p>
0251: *
0252: * @return A string containing the public identifier, or
0253: * null if none is available.
0254: *
0255: * @see #getSystemId
0256: */
0257: public String getPublicId() {
0258: return (mInp != null) ? mInp.pubid : null;
0259: }
0260:
0261: /**
0262: * Return the system identifier for the current document event.
0263: *
0264: * <p>The return value is the system identifier of the document
0265: * entity or of the external parsed entity in which the markup
0266: * triggering the event appears.</p>
0267: *
0268: * <p>If the system identifier is a URL, the parser must resolve it
0269: * fully before passing it to the application.</p>
0270: *
0271: * @return A string containing the system identifier, or null
0272: * if none is available.
0273: *
0274: * @see #getPublicId
0275: */
0276: public String getSystemId() {
0277: return (mInp != null) ? mInp.sysid : null;
0278: }
0279:
0280: /**
0281: * Return the line number where the current document event ends.
0282: *
0283: * @return Always returns -1 indicating the line number is not
0284: * available.
0285: *
0286: * @see #getColumnNumber
0287: */
0288: public int getLineNumber() {
0289: return -1;
0290: }
0291:
0292: /**
0293: * Return the column number where the current document event ends.
0294: *
0295: * @return Always returns -1 indicating the column number is not
0296: * available.
0297: *
0298: * @see #getLineNumber
0299: */
0300: public int getColumnNumber() {
0301: return -1;
0302: }
0303:
0304: /**
0305: * Indicates whether or not this parser is configured to
0306: * understand namespaces.
0307: *
0308: * @return true if this parser is configured to
0309: * understand namespaces; false otherwise.
0310: */
0311: public boolean isNamespaceAware() {
0312: return mIsNSAware;
0313: }
0314:
0315: /**
0316: * Indicates whether or not this parser is configured to validate
0317: * XML documents.
0318: *
0319: * @return true if this parser is configured to validate XML
0320: * documents; false otherwise.
0321: */
0322: public boolean isValidating() {
0323: return false;
0324: }
0325:
0326: /**
0327: * Parse the content of the given {@link java.io.InputStream}
0328: * instance as XML using the specified
0329: * {@link org.xml.sax.helpers.DefaultHandler}.
0330: *
0331: * @param src InputStream containing the content to be parsed.
0332: * @param handler The SAX DefaultHandler to use.
0333: * @exception IOException If any IO errors occur.
0334: * @exception IllegalArgumentException If the given InputStream or handler is null.
0335: * @exception SAXException If the underlying parser throws a
0336: * SAXException while parsing.
0337: * @see org.xml.sax.helpers.DefaultHandler
0338: */
0339: public void parse(InputStream src, DefaultHandler handler)
0340: throws SAXException, IOException {
0341: if ((src == null) || (handler == null))
0342: throw new IllegalArgumentException("");
0343: parse(new InputSource(src), handler);
0344: }
0345:
0346: /**
0347: * Parse the content given {@link org.xml.sax.InputSource}
0348: * as XML using the specified
0349: * {@link org.xml.sax.helpers.DefaultHandler}.
0350: *
0351: * @param is The InputSource containing the content to be parsed.
0352: * @param handler The SAX DefaultHandler to use.
0353: * @exception IOException If any IO errors occur.
0354: * @exception IllegalArgumentException If the InputSource or handler is null.
0355: * @exception SAXException If the underlying parser throws a
0356: * SAXException while parsing.
0357: * @see org.xml.sax.helpers.DefaultHandler
0358: */
0359: public void parse(InputSource is, DefaultHandler handler)
0360: throws SAXException, IOException {
0361: if ((is == null) || (handler == null))
0362: throw new IllegalArgumentException("");
0363: // Set up the handler
0364: mHand = handler;
0365: // Set up the document
0366: mInp = new Input(BUFFSIZE_READER);
0367: setinp(is);
0368: parse(handler);
0369: }
0370:
0371: /**
0372: * Parse the XML document content using the specified
0373: * {@link org.xml.sax.helpers.DefaultHandler}.
0374: *
0375: * @param handler The SAX DefaultHandler to use.
0376: * @exception IOException If any IO errors occur.
0377: * @exception SAXException If the underlying parser throws a
0378: * SAXException while parsing.
0379: * @see org.xml.sax.helpers.DefaultHandler
0380: */
0381: private void parse(DefaultHandler handler) throws SAXException,
0382: IOException {
0383: try {
0384: // Initialize the parser
0385: mPEnt = new Hashtable();
0386: mEnt = new Hashtable();
0387: mDoc = mInp; // current input is document entity
0388: mChars = mInp.chars; // use document entity buffer
0389: // Parse an xml document
0390: char ch;
0391: mHand.setDocumentLocator(this );
0392: mHand.startDocument();
0393: mSt = 1;
0394: while ((ch = next()) != EOS) {
0395: switch (chtyp(ch)) {
0396: case '<':
0397: ch = next();
0398: switch (ch) {
0399: case '?':
0400: pi();
0401: break;
0402:
0403: case '!':
0404: ch = next();
0405: back();
0406: if (ch == '-')
0407: comm();
0408: else
0409: dtd();
0410: break;
0411:
0412: default: // must be the first char of an xml name
0413: if (mSt == 5) // misc after document's element
0414: panic(FAULT);
0415: // Document's element.
0416: back();
0417: mSt = 4; // document's element
0418: elm();
0419: mSt = 5; // misc after document's element
0420: break;
0421: }
0422: break;
0423:
0424: case ' ':
0425: // Skip white spaces
0426: break;
0427:
0428: default:
0429: panic(FAULT);
0430: }
0431: }
0432: if (mSt != 5) // misc after document's element
0433: panic(FAULT);
0434: } finally {
0435: mHand.endDocument();
0436: while (mAttL != null) {
0437: while (mAttL.list != null) {
0438: if (mAttL.list.list != null)
0439: del(mAttL.list.list);
0440: mAttL.list = del(mAttL.list);
0441: }
0442: mAttL = del(mAttL);
0443: }
0444: while (mElm != null)
0445: mElm = del(mElm);
0446: while (mPref != mXml)
0447: mPref = del(mPref);
0448: while (mInp != null)
0449: pop();
0450: if ((mDoc != null) && (mDoc.src != null)) {
0451: try {
0452: mDoc.src.close();
0453: } catch (IOException ioe) {
0454: }
0455: }
0456: mPEnt = null;
0457: mEnt = null;
0458: mDoc = null;
0459: mHand = null;
0460: mSt = 0;
0461: }
0462: }
0463:
0464: /**
0465: * Parses the document type declaration.
0466: *
0467: * @exception SAXException
0468: * @exception IOException
0469: */
0470: private void dtd() throws SAXException, IOException {
0471: char ch;
0472: String str = null;
0473: String name = null;
0474: Pair psid = null;
0475: // read 'DOCTYPE'
0476: if ("DOCTYPE".equals(name(false)) != true)
0477: panic(FAULT);
0478: mSt = 2; // DTD
0479: for (short st = 0; st >= 0;) {
0480: ch = next();
0481: switch (st) {
0482: case 0: // read the document type name
0483: if (chtyp(ch) != ' ') {
0484: back();
0485: name = name(mIsNSAware);
0486: wsskip();
0487: st = 1; // read 'PUPLIC' or 'SYSTEM'
0488: }
0489: break;
0490:
0491: case 1: // read 'PUPLIC' or 'SYSTEM'
0492: switch (chtyp(ch)) {
0493: case 'A':
0494: back();
0495: psid = pubsys(' ');
0496: st = 2; // skip spaces before internal subset
0497: break;
0498:
0499: case '[':
0500: back();
0501: st = 2; // skip spaces before internal subset
0502: break;
0503:
0504: case '>':
0505: back();
0506: st = 3; // skip spaces after internal subset
0507: break;
0508:
0509: default:
0510: panic(FAULT);
0511: }
0512: break;
0513:
0514: case 2: // skip spaces before internal subset
0515: switch (chtyp(ch)) {
0516: case '[':
0517: // Process internal subset
0518: dtdsub();
0519: st = 3; // skip spaces after internal subset
0520: break;
0521:
0522: case '>':
0523: // There is no internal subset
0524: back();
0525: st = 3; // skip spaces after internal subset
0526: break;
0527:
0528: case ' ':
0529: // skip white spaces
0530: break;
0531:
0532: default:
0533: panic(FAULT);
0534: }
0535: break;
0536:
0537: case 3: // skip spaces after internal subset
0538: switch (chtyp(ch)) {
0539: case '>':
0540: if (psid != null) {
0541: // Report the DTD external subset
0542: InputSource is = mHand.resolveEntity(psid.name,
0543: psid.value);
0544: if (is != null) {
0545: if (mIsSAlone == false) {
0546: // Set the end of DTD external subset char
0547: back();
0548: setch(']');
0549: // Set the DTD external subset InputSource
0550: push(new Input(BUFFSIZE_READER));
0551: setinp(is);
0552: mInp.pubid = psid.name;
0553: mInp.sysid = psid.value;
0554: // Parse the DTD external subset
0555: dtdsub();
0556: } else {
0557: // Unresolved DTD external subset
0558: mHand.skippedEntity("[dtd]");
0559: // Release reader and stream
0560: if (is.getCharacterStream() != null) {
0561: try {
0562: is.getCharacterStream().close();
0563: } catch (IOException ioe) {
0564: }
0565: }
0566: if (is.getByteStream() != null) {
0567: try {
0568: is.getByteStream().close();
0569: } catch (IOException ioe) {
0570: }
0571: }
0572: }
0573: } else {
0574: // Unresolved DTD external subset
0575: mHand.skippedEntity("[dtd]");
0576: }
0577: del(psid);
0578: }
0579: st = -1; // end of DTD
0580: break;
0581:
0582: case ' ':
0583: // skip white spaces
0584: break;
0585:
0586: default:
0587: panic(FAULT);
0588: }
0589: break;
0590:
0591: default:
0592: panic(FAULT);
0593: }
0594: }
0595: mSt = 3; // misc after DTD
0596: }
0597:
0598: /**
0599: * Parses the document type declaration subset.
0600: *
0601: * @exception SAXException
0602: * @exception IOException
0603: */
0604: private void dtdsub() throws SAXException, IOException {
0605: char ch;
0606: for (short st = 0; st >= 0;) {
0607: ch = next();
0608: switch (st) {
0609: case 0: // skip white spaces before a declaration
0610: switch (chtyp(ch)) {
0611: case '<':
0612: ch = next();
0613: switch (ch) {
0614: case '?':
0615: pi();
0616: break;
0617:
0618: case '!':
0619: ch = next();
0620: back();
0621: if (ch == '-') {
0622: comm();
0623: break;
0624: }
0625: // markup or entity declaration
0626: bntok();
0627: switch (bkeyword()) {
0628: case 'n':
0629: dtdent();
0630: break;
0631:
0632: case 'a':
0633: dtdattl(); // parse attributes declaration
0634: break;
0635:
0636: case 'e':
0637: dtdelm(); // parse element declaration
0638: break;
0639:
0640: case 'o':
0641: dtdnot(); // parse notation declaration
0642: break;
0643:
0644: default:
0645: panic(FAULT); // unsupported markup declaration
0646: break;
0647: }
0648: st = 1; // read the end of declaration
0649: break;
0650:
0651: default:
0652: panic(FAULT);
0653: break;
0654: }
0655: break;
0656:
0657: case '%':
0658: // A parameter entity reference
0659: pent(' ');
0660: break;
0661:
0662: case ']':
0663: // End of DTD subset
0664: st = -1;
0665: break;
0666:
0667: case ' ':
0668: // Skip white spaces
0669: break;
0670:
0671: case 'Z':
0672: // End of stream
0673: if (next() != ']')
0674: panic(FAULT);
0675: st = -1;
0676: break;
0677:
0678: default:
0679: panic(FAULT);
0680: }
0681: break;
0682:
0683: case 1: // read the end of declaration
0684: switch (ch) {
0685: case '>': // there is no notation
0686: st = 0; // skip white spaces before a declaration
0687: break;
0688:
0689: case ' ':
0690: case '\n':
0691: case '\r':
0692: case '\t':
0693: // Skip white spaces
0694: break;
0695:
0696: default:
0697: panic(FAULT);
0698: break;
0699: }
0700: break;
0701:
0702: default:
0703: panic(FAULT);
0704: }
0705: }
0706: }
0707:
0708: /**
0709: * Parses an entity declaration.
0710: * This method fills the general (<code>mEnt</code>) and parameter
0711: * (<code>mPEnt</code>) entity look up table.
0712: *
0713: * @exception SAXException
0714: * @exception IOException
0715: */
0716: private void dtdent() throws SAXException, IOException {
0717: String str = null;
0718: char[] val = null;
0719: Input inp = null;
0720: Pair ids = null;
0721: char ch;
0722: for (short st = 0; st >= 0;) {
0723: ch = next();
0724: switch (st) {
0725: case 0: // skip white spaces before entity name
0726: switch (chtyp(ch)) {
0727: case ' ':
0728: // Skip white spaces
0729: break;
0730:
0731: case '%':
0732: // Parameter entity or parameter entity declaration.
0733: ch = next();
0734: back();
0735: if (chtyp(ch) == ' ') {
0736: // Parameter entity declaration.
0737: wsskip();
0738: str = name(false);
0739: switch (chtyp(wsskip())) {
0740: case 'A':
0741: // Read the external identifier
0742: ids = pubsys(' ');
0743: if (wsskip() == '>') {
0744: // External parsed entity
0745: if (mPEnt.containsKey(str) == false) { // [#4.2]
0746: inp = new Input();
0747: inp.pubid = ids.name;
0748: inp.sysid = ids.value;
0749: mPEnt.put(str, inp);
0750: }
0751: } else {
0752: panic(FAULT);
0753: }
0754: del(ids);
0755: st = -1; // the end of declaration
0756: break;
0757:
0758: case '\"':
0759: case '\'':
0760: // Read the parameter entity value
0761: bqstr('d');
0762: // Create the parameter entity value
0763: val = new char[mBuffIdx + 1];
0764: System.arraycopy(mBuff, 1, val, 1,
0765: val.length - 1);
0766: // Add surrounding spaces [#4.4.8]
0767: val[0] = ' ';
0768: // Add the entity to the entity look up table
0769: if (mPEnt.containsKey(str) == false) { // [#4.2]
0770: inp = new Input(val);
0771: inp.pubid = mInp.pubid;
0772: inp.sysid = mInp.sysid;
0773: mPEnt.put(str, inp);
0774: }
0775: st = -1; // the end of declaration
0776: break;
0777:
0778: default:
0779: panic(FAULT);
0780: break;
0781: }
0782: } else {
0783: // Parameter entity reference.
0784: pent(' ');
0785: }
0786: break;
0787:
0788: default:
0789: back();
0790: str = name(false);
0791: st = 1; // read entity declaration value
0792: break;
0793: }
0794: break;
0795:
0796: case 1: // read entity declaration value
0797: switch (chtyp(ch)) {
0798: case '\"': // internal entity
0799: case '\'':
0800: back();
0801: bqstr('d'); // read a string into the buffer
0802: if (mEnt.get(str) == null) {
0803: // Create general entity value
0804: val = new char[mBuffIdx];
0805: System.arraycopy(mBuff, 1, val, 0, val.length);
0806: // Add the entity to the entity look up table
0807: if (mEnt.containsKey(str) == false) { // [#4.2]
0808: inp = new Input(val);
0809: inp.pubid = mInp.pubid;
0810: inp.sysid = mInp.sysid;
0811: mEnt.put(str, inp);
0812: }
0813: }
0814: st = -1; // the end of declaration
0815: break;
0816:
0817: case 'A': // external entity
0818: back();
0819: ids = pubsys(' ');
0820: switch (wsskip()) {
0821: case '>': // external parsed entity
0822: if (mEnt.containsKey(str) == false) { // [#4.2]
0823: inp = new Input();
0824: inp.pubid = ids.name;
0825: inp.sysid = ids.value;
0826: mEnt.put(str, inp);
0827: }
0828: break;
0829:
0830: case 'N': // external general unparsed entity
0831: if ("NDATA".equals(name(false)) == true) {
0832: wsskip();
0833: mHand.unparsedEntityDecl(str, ids.name,
0834: ids.value, name(false));
0835: break;
0836: }
0837: default:
0838: panic(FAULT);
0839: break;
0840: }
0841: del(ids);
0842: st = -1; // the end of declaration
0843: break;
0844:
0845: case ' ':
0846: // Skip white spaces
0847: break;
0848:
0849: default:
0850: panic(FAULT);
0851: break;
0852: }
0853: break;
0854:
0855: default:
0856: panic(FAULT);
0857: }
0858: }
0859: }
0860:
0861: /**
0862: * Parses an element declaration.
0863: *
0864: * This method parses the declaration up to the closing angle
0865: * bracket.
0866: *
0867: * @exception SAXException
0868: * @exception IOException
0869: */
0870: private void dtdelm() throws SAXException, IOException {
0871: // This is stub implementation which skips an element
0872: // declaration.
0873: wsskip();
0874: name(mIsNSAware);
0875:
0876: char ch;
0877: while (true) {
0878: ch = next();
0879: switch (ch) {
0880: case '>':
0881: back();
0882: return;
0883:
0884: case EOS:
0885: panic(FAULT);
0886:
0887: default:
0888: break;
0889: }
0890: }
0891: }
0892:
0893: /**
0894: * Parses an attribute list declaration.
0895: *
0896: * This method parses the declaration up to the closing angle
0897: * bracket.
0898: *
0899: * @exception SAXException
0900: * @exception IOException
0901: */
0902: private void dtdattl() throws SAXException, IOException {
0903: char elmqn[] = null;
0904: Pair elm = null;
0905: char ch;
0906: for (short st = 0; st >= 0;) {
0907: ch = next();
0908: switch (st) {
0909: case 0: // read the element name
0910: switch (chtyp(ch)) {
0911: case 'a':
0912: case 'A':
0913: case '_':
0914: case 'X':
0915: case ':':
0916: back();
0917: // Get the element from the list or add a new one.
0918: elmqn = qname(mIsNSAware);
0919: elm = find(mAttL, elmqn);
0920: if (elm == null) {
0921: elm = pair(mAttL);
0922: elm.chars = elmqn;
0923: mAttL = elm;
0924: }
0925: st = 1; // read an attribute declaration
0926: break;
0927:
0928: case ' ':
0929: break;
0930:
0931: case '%':
0932: pent(' ');
0933: break;
0934:
0935: default:
0936: panic(FAULT);
0937: break;
0938: }
0939: break;
0940:
0941: case 1: // read an attribute declaration
0942: switch (chtyp(ch)) {
0943: case 'a':
0944: case 'A':
0945: case '_':
0946: case 'X':
0947: case ':':
0948: back();
0949: dtdatt(elm);
0950: if (wsskip() == '>')
0951: return;
0952: break;
0953:
0954: case ' ':
0955: break;
0956:
0957: case '%':
0958: pent(' ');
0959: break;
0960:
0961: default:
0962: panic(FAULT);
0963: break;
0964: }
0965: break;
0966:
0967: default:
0968: panic(FAULT);
0969: break;
0970: }
0971: }
0972: }
0973:
0974: /**
0975: * Parses an attribute declaration.
0976: *
0977: * The attribut uses the following fields of Pair object:
0978: * chars - characters of qualified name
0979: * id - the type identifier of the attribute
0980: * list - a pair which holds the default value (chars field)
0981: *
0982: * @param elm An object which reprecents all defined attributes on an element.
0983: * @exception SAXException
0984: * @exception IOException
0985: */
0986: private void dtdatt(Pair elm) throws SAXException, IOException {
0987: char attqn[] = null;
0988: Pair att = null;
0989: char ch;
0990: for (short st = 0; st >= 0;) {
0991: ch = next();
0992: switch (st) {
0993: case 0: // the attribute name
0994: switch (chtyp(ch)) {
0995: case 'a':
0996: case 'A':
0997: case '_':
0998: case 'X':
0999: case ':':
1000: back();
1001: // Get the attribut from the list or add a new one.
1002: attqn = qname(mIsNSAware);
1003: att = find(elm.list, attqn);
1004: if (att == null) {
1005: // New attribute declaration
1006: att = pair(elm.list);
1007: att.chars = attqn;
1008: elm.list = att;
1009: } else {
1010: // Do not override the attribute declaration [#3.3]
1011: att = pair(null);
1012: att.chars = attqn;
1013: att.id = 'c';
1014: }
1015: wsskip();
1016: st = 1;
1017: break;
1018:
1019: case '%':
1020: pent(' ');
1021: break;
1022:
1023: case ' ':
1024: break;
1025:
1026: default:
1027: panic(FAULT);
1028: break;
1029: }
1030: break;
1031:
1032: case 1: // the attribute type
1033: switch (chtyp(ch)) {
1034: case '(':
1035: att.id = 'u'; // enumeration type
1036: st = 2; // read the first element of the list
1037: break;
1038:
1039: case '%':
1040: pent(' ');
1041: break;
1042:
1043: case ' ':
1044: break;
1045:
1046: default:
1047: back();
1048: bntok(); // read type id
1049: att.id = bkeyword();
1050: switch (att.id) {
1051: case 'o': // NOTATION
1052: if (wsskip() != '(')
1053: panic(FAULT);
1054: ch = next();
1055: st = 2; // read the first element of the list
1056: break;
1057:
1058: case 'i': // ID
1059: case 'r': // IDREF
1060: case 'R': // IDREFS
1061: case 'n': // ENTITY
1062: case 'N': // ENTITIES
1063: case 't': // NMTOKEN
1064: case 'T': // NMTOKENS
1065: case 'c': // CDATA
1066: wsskip();
1067: st = 4; // read default declaration
1068: break;
1069:
1070: default:
1071: panic(FAULT);
1072: break;
1073: }
1074: break;
1075: }
1076: break;
1077:
1078: case 2: // read the first element of the list
1079: switch (chtyp(ch)) {
1080: case 'a':
1081: case 'A':
1082: case 'd':
1083: case '.':
1084: case ':':
1085: case '-':
1086: case '_':
1087: case 'X':
1088: back();
1089: switch (att.id) {
1090: case 'u': // enumeration type
1091: bntok();
1092: break;
1093:
1094: case 'o': // NOTATION
1095: mBuffIdx = -1;
1096: bname(false);
1097: break;
1098:
1099: default:
1100: panic(FAULT);
1101: break;
1102: }
1103: wsskip();
1104: st = 3; // read next element of the list
1105: break;
1106:
1107: case '%':
1108: pent(' ');
1109: break;
1110:
1111: case ' ':
1112: break;
1113:
1114: default:
1115: panic(FAULT);
1116: break;
1117: }
1118: break;
1119:
1120: case 3: // read next element of the list
1121: switch (ch) {
1122: case ')':
1123: wsskip();
1124: st = 4; // read default declaration
1125: break;
1126:
1127: case '|':
1128: wsskip();
1129: switch (att.id) {
1130: case 'u': // enumeration type
1131: bntok();
1132: break;
1133:
1134: case 'o': // NOTATION
1135: mBuffIdx = -1;
1136: bname(false);
1137: break;
1138:
1139: default:
1140: panic(FAULT);
1141: break;
1142: }
1143: wsskip();
1144: break;
1145:
1146: case '%':
1147: pent(' ');
1148: break;
1149:
1150: default:
1151: panic(FAULT);
1152: break;
1153: }
1154: break;
1155:
1156: case 4: // read default declaration
1157: switch (ch) {
1158: case '#':
1159: bntok();
1160: switch (bkeyword()) {
1161: case 'F': // FIXED
1162: switch (wsskip()) {
1163: case '\"':
1164: case '\'':
1165: st = 5; // read the default value
1166: break;
1167:
1168: default:
1169: st = -1;
1170: break;
1171: }
1172: break;
1173:
1174: case 'Q': // REQUIRED
1175: case 'I': // IMPLIED
1176: st = -1;
1177: break;
1178:
1179: default:
1180: panic(FAULT);
1181: break;
1182: }
1183: break;
1184:
1185: case '\"':
1186: case '\'':
1187: back();
1188: st = 5; // read the default value
1189: break;
1190:
1191: case ' ':
1192: case '\n':
1193: case '\r':
1194: case '\t':
1195: break;
1196:
1197: case '%':
1198: pent(' ');
1199: break;
1200:
1201: default:
1202: back();
1203: st = -1;
1204: break;
1205: }
1206: break;
1207:
1208: case 5: // read the default value
1209: switch (ch) {
1210: case '\"':
1211: case '\'':
1212: back();
1213: bqstr('d'); // the value in the mBuff now
1214: att.list = pair(null);
1215: // Create a string like "attqname='value' "
1216: att.list.chars = new char[att.chars.length
1217: + mBuffIdx + 3];
1218: System.arraycopy(att.chars, 1, att.list.chars, 0,
1219: att.chars.length - 1);
1220: att.list.chars[att.chars.length - 1] = '=';
1221: att.list.chars[att.chars.length] = ch;
1222: System.arraycopy(mBuff, 1, att.list.chars,
1223: att.chars.length + 1, mBuffIdx);
1224: att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1225: att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1226: st = -1;
1227: break;
1228:
1229: default:
1230: panic(FAULT);
1231: break;
1232: }
1233: break;
1234:
1235: default:
1236: panic(FAULT);
1237: break;
1238: }
1239: }
1240: }
1241:
1242: /**
1243: * Parses a notation declaration.
1244: *
1245: * This method parses the declaration up to the closing angle
1246: * bracket.
1247: *
1248: * @exception SAXException
1249: * @exception IOException
1250: */
1251: private void dtdnot() throws SAXException, IOException {
1252: wsskip();
1253: String name = name(false);
1254: wsskip();
1255: Pair ids = pubsys('N');
1256: mHand.notationDecl(name, ids.name, ids.value);
1257: del(ids);
1258: }
1259:
1260: /**
1261: * Parses an element.
1262: *
1263: * This recursive method is responsible for prefix scope control
1264: * (<code>mPref</code>). When the element is leaving the scope all
1265: * prefixes defined within the element are removed from the prefix
1266: * stack.
1267: *
1268: * @exception SAXException
1269: * @exception IOException
1270: */
1271: private void elm() throws SAXException, IOException {
1272: // Save the current top of the prefix stack
1273: Pair pref = mPref;
1274: // Read an element name and put it on top of the element stack
1275: mElm = pair(mElm);
1276: mElm.chars = qname(mIsNSAware);
1277: mElm.name = mElm.local();
1278: // Find the list of defined attributs of the current element
1279: Pair elm = find(mAttL, mElm.chars);
1280: // Read attributes till the end of the element tag
1281: mAttrIdx = 0;
1282: Pair att = pair(null);
1283: att.list = (elm != null) ? elm.list : null; // attrs defined on this elm
1284: attr(att);
1285: del(att);
1286: // Read the element and it's content
1287: mBuffIdx = -1;
1288: char ch;
1289: for (short st = 0; st >= 0;) {
1290: ch = (mChIdx < mChLen) ? mChars[mChIdx++] : next();
1291: switch (st) {
1292: case 0: // read the end of the element tag
1293: case 1: // read the end of the empty element
1294: switch (ch) {
1295: case '>':
1296: // Report the element
1297: if (mIsNSAware == true) {
1298: mElm.value = rslv(mElm.chars);
1299: mHand.startElement(mElm.value, mElm.name, "",
1300: mAttrs);
1301: } else {
1302: mHand.startElement("", "", mElm.name, mAttrs);
1303: }
1304: mItems = null;
1305: st = (st == 0) ? (short) 2 : (short) -1;
1306: break;
1307:
1308: case '/':
1309: if (st != 0)
1310: panic(FAULT);
1311: st = 1;
1312: break;
1313:
1314: default:
1315: panic(FAULT);
1316: }
1317: break;
1318:
1319: case 2: // skip white space between tags
1320: switch (ch) {
1321: case ' ':
1322: case '\t':
1323: case '\n':
1324: bappend(ch);
1325: break;
1326:
1327: case '\r': // EOL processing [#2.11]
1328: if (next() != '\n')
1329: back();
1330: bappend('\n');
1331: break;
1332:
1333: case '<':
1334: // Need revisit: With additional info from DTD and xml:space attr [#2.10]
1335: // the following call can be supported:
1336: // mHand.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1));
1337: bflash();
1338:
1339: default:
1340: back();
1341: st = 3;
1342: break;
1343: }
1344: break;
1345:
1346: case 3: // read the text content of the element
1347: switch (ch) {
1348: case '&':
1349: ent('x');
1350: break;
1351:
1352: case '<':
1353: bflash();
1354: switch (next()) {
1355: case '/': // the end of the element content
1356: // Check element's open/close tags balance
1357: mBuffIdx = -1;
1358: bname(mIsNSAware);
1359: char[] chars = mElm.chars;
1360: if (chars.length == (mBuffIdx + 1)) {
1361: for (char i = 1; i <= mBuffIdx; i += 1) {
1362: if (chars[i] != mBuff[i])
1363: panic(FAULT);
1364: }
1365: } else {
1366: panic(FAULT);
1367: }
1368: // Skip white spaces before '>'
1369: if (wsskip() != '>')
1370: panic(FAULT);
1371: ch = next();
1372: st = -1;
1373: break;
1374:
1375: case '!': // a comment or a CDATA
1376: ch = next();
1377: back();
1378: switch (ch) {
1379: case '-': // must be a comment
1380: comm();
1381: break;
1382:
1383: case '[': // must be a CDATA section
1384: cdat();
1385: break;
1386:
1387: default:
1388: panic(FAULT);
1389: }
1390: break;
1391:
1392: case '?': // processing instruction
1393: pi();
1394: break;
1395:
1396: default: // must be the first char of an xml name
1397: back();
1398: elm(); // recursive call
1399: break;
1400: }
1401: mBuffIdx = -1;
1402: if (st != -1)
1403: st = 2;
1404: break;
1405:
1406: case '\r': // EOL processing [#2.11]
1407: if (next() != '\n')
1408: back();
1409: bappend('\n');
1410: break;
1411:
1412: case EOS:
1413: panic(FAULT);
1414: break;
1415:
1416: default:
1417: bappend(ch);
1418: break;
1419: }
1420: break;
1421:
1422: default:
1423: panic(FAULT);
1424: }
1425: }
1426: // Report the end of element
1427: if (mIsNSAware == true)
1428: mHand.endElement(mElm.value, mElm.name, "");
1429: else
1430: mHand.endElement("", "", mElm.name);
1431: // Remove the top element tag
1432: mElm = del(mElm);
1433: // Restore the top of the prefix stack
1434: while (mPref != pref) {
1435: mHand.endPrefixMapping(mPref.name);
1436: mPref = del(mPref);
1437: }
1438: }
1439:
1440: /**
1441: * Parses an attribute.
1442: *
1443: * This recursive method is responsible for prefix addition
1444: * (<code>mPref</code>) and prefix mapping reports on the way down. The
1445: * element's start tag end triggers the return process. The method then
1446: * on it's way back resolves prefixes and accumulates attributes.<br />
1447: * Note that this method will not report namespace declaration attributes
1448: * (xmlns* attributes), the
1449: * {@link DefaultHandler#startPrefixMapping startPrefixMapping} method
1450: * is invoked instead.
1451: *
1452: * @param att An object which reprecents current attribute.
1453: * @exception SAXException
1454: * @exception IOException
1455: */
1456: private void attr(Pair att) throws SAXException, IOException {
1457: Pair next = null;
1458: char norm = 'c'; // CDATA-type normalization by default [#3.3.3]
1459: String val;
1460: String type;
1461: try {
1462: switch (wsskip()) {
1463: case '/':
1464: case '>':
1465: // Go through all defined attributes on current tag to
1466: // find defaults
1467: for (Pair def = att.list; def != null; def = def.next) {
1468: if (def.list != null) {
1469: // Attribut definition with default value
1470: Pair act = att.next;
1471: while (act != null) {
1472: if (act.eqname(def.chars) == true)
1473: break;
1474: act = act.next;
1475: }
1476: if (act == null) {
1477: // Add default attribute
1478: push(new Input(def.list.chars));
1479: attr(att);
1480: return;
1481: }
1482: }
1483: }
1484: // Ensure the attribute string array capacity
1485: mAttrs.setLength(mAttrIdx);
1486: mItems = mAttrs.mItems;
1487: return;
1488:
1489: default:
1490: // Read the attribute name and value
1491: att.chars = qname(mIsNSAware);
1492: att.name = att.local();
1493: type = "CDATA";
1494: if (att.list != null) {
1495: Pair attr = find(att.list, att.chars);
1496: if (attr != null) {
1497: switch (attr.id) {
1498: case 'i':
1499: type = "ID";
1500: norm = 'i';
1501: break;
1502:
1503: case 'r':
1504: type = "IDREF";
1505: norm = 'i';
1506: break;
1507:
1508: case 'R':
1509: type = "IDREFS";
1510: norm = 'i';
1511: break;
1512:
1513: case 'n':
1514: type = "ENTITY";
1515: norm = 'i';
1516: break;
1517:
1518: case 'N':
1519: type = "ENTITIES";
1520: norm = 'i';
1521: break;
1522:
1523: case 't':
1524: type = "NMTOKEN";
1525: norm = 'i';
1526: break;
1527:
1528: case 'T':
1529: type = "NMTOKENS";
1530: norm = 'i';
1531: break;
1532:
1533: case 'u':
1534: type = "NMTOKEN";
1535: norm = 'i';
1536: break;
1537:
1538: case 'o':
1539: type = "NOTATION";
1540: norm = 'i';
1541: break;
1542:
1543: case 'c':
1544: norm = 'c';
1545: break;
1546:
1547: default:
1548: panic(FAULT);
1549: break;
1550: }
1551: }
1552: }
1553: wsskip();
1554: if (next() != '=')
1555: panic(FAULT);
1556: bqstr(norm); // read the value with normalization.
1557: val = new String(mBuff, 1, mBuffIdx);
1558: // Put a namespace declaration on top of the prefix stack
1559: if ((mIsNSAware == false)
1560: || (isdecl(att, val) == false)) {
1561: // An ordinary attribute
1562: mAttrIdx++;
1563: // Recursive call to parse the next attribute
1564: next = pair(att);
1565: next.list = att.list;
1566: attr(next);
1567: mAttrIdx--;
1568: // Add the attribute to the attributes string array
1569: char idx = (char) (mAttrIdx << 3);
1570: mItems[idx + 1] = att.qname(); // attr qname
1571: mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1572: mItems[idx + 3] = val; // attr value
1573: mItems[idx + 4] = type; // attr type
1574: // Resolve the prefix if any and report the attribute
1575: // NOTE: The attribute does not accept the default namespace.
1576: mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars)
1577: : "";
1578: } else {
1579: // A namespace declaration
1580: // Report a start of the new mapping
1581: mHand.startPrefixMapping(mPref.name, mPref.value);
1582: // Recursive call to parse the next attribute
1583: next = pair(att);
1584: next.list = att.list;
1585: attr(next);
1586: // NOTE: The namespace declaration is not reported.
1587: }
1588: break;
1589: }
1590: } finally {
1591: if (next != null)
1592: del(next);
1593: }
1594: }
1595:
1596: /**
1597: * Parses a comment.
1598: *
1599: * @exception org.xml.SAXException
1600: * @exception java.io.IOException
1601: */
1602: private void comm() throws SAXException, IOException {
1603: if (mSt == 0)
1604: mSt = 1; // misc before DTD
1605: char ch;
1606: for (short st = 0; st >= 0;) {
1607: ch = (mChIdx < mChLen) ? mChars[mChIdx++] : next();
1608: switch (st) {
1609: case 0: // first '-' of the comment open
1610: if (ch == '-')
1611: st = 1;
1612: else
1613: panic(FAULT);
1614: break;
1615:
1616: case 1: // secind '-' of the comment open
1617: if (ch == '-')
1618: st = 2;
1619: else
1620: panic(FAULT);
1621: break;
1622:
1623: case 2: // skip the comment body
1624: switch (ch) {
1625: case '-':
1626: st = 3;
1627: break;
1628:
1629: case EOS:
1630: panic(FAULT);
1631: break;
1632:
1633: default:
1634: break;
1635: }
1636: break;
1637:
1638: case 3: // second '-' of the comment close
1639: st = (ch == '-') ? (short) 4 : (short) 2;
1640: break;
1641:
1642: case 4: // '>' of the comment close
1643: if (ch == '>')
1644: st = -1;
1645: else
1646: panic(FAULT);
1647: break;
1648:
1649: default:
1650: panic(FAULT);
1651: }
1652: }
1653: }
1654:
1655: /**
1656: * Parses a processing instruction.
1657: *
1658: * @exception SAXException
1659: * @exception IOException
1660: */
1661: private void pi() throws SAXException, IOException {
1662: char ch;
1663: String str = null;
1664: mBuffIdx = -1;
1665: for (short st = 0; st >= 0;) {
1666: ch = next();
1667: switch (st) {
1668: case 0: // read the PI target name
1669: switch (chtyp(ch)) {
1670: case 'a':
1671: case 'A':
1672: case '_':
1673: case ':':
1674: case 'X':
1675: back();
1676: str = name(false);
1677: // PI target name may not be empty string [#2.6]
1678: // PI target name 'XML' is reserved [#2.6]
1679: if ((str.length() == 0)
1680: || (mXml.name.equals(str.toLowerCase()) == true))
1681: panic(FAULT);
1682: // This is processing instruction
1683: if (mSt == 0) // the begining of the document
1684: mSt = 1; // misc before DTD
1685: wsskip(); // skip spaces after the PI target name
1686: st = 1; // accumulate the PI body
1687: mBuffIdx = -1;
1688: break;
1689:
1690: case 'Z': // EOS
1691: panic(FAULT);
1692: break;
1693:
1694: default:
1695: panic(FAULT);
1696: }
1697: break;
1698:
1699: case 1: // accumulate the PI body
1700: switch (ch) {
1701: case '?':
1702: st = 2; // end of the PI body
1703: break;
1704:
1705: case EOS:
1706: panic(FAULT);
1707: break;
1708:
1709: default:
1710: bappend(ch);
1711: break;
1712: }
1713: break;
1714:
1715: case 2: // end of the PI body
1716: switch (ch) {
1717: case '>':
1718: // PI has been read.
1719: mHand.processingInstruction(str, new String(mBuff,
1720: 0, mBuffIdx + 1));
1721: st = -1;
1722: break;
1723:
1724: case '?':
1725: bappend('?');
1726: break;
1727:
1728: case EOS:
1729: panic(FAULT);
1730: break;
1731:
1732: default:
1733: bappend('?');
1734: bappend(ch);
1735: st = 1; // accumulate the PI body
1736: break;
1737: }
1738: break;
1739:
1740: default:
1741: panic(FAULT);
1742: }
1743: }
1744: }
1745:
1746: /**
1747: * Parses a character data.
1748: *
1749: * @exception SAXException
1750: * @exception IOException
1751: */
1752: private void cdat() throws SAXException, IOException {
1753: char ch;
1754: mBuffIdx = -1;
1755: for (short st = 0; st >= 0;) {
1756: ch = next();
1757: switch (st) {
1758: case 0: // the first '[' of the CDATA open
1759: if (ch == '[')
1760: st = 1;
1761: else
1762: panic(FAULT);
1763: break;
1764:
1765: case 1: // read "CDATA"
1766: if (chtyp(ch) == 'A') {
1767: bappend(ch);
1768: } else {
1769: if ("CDATA".equals(new String(mBuff, 0,
1770: mBuffIdx + 1)) != true)
1771: panic(FAULT);
1772: back();
1773: st = 2;
1774: }
1775: break;
1776:
1777: case 2: // the second '[' of the CDATA open
1778: if (ch != '[')
1779: panic(FAULT);
1780: mBuffIdx = -1;
1781: st = 3;
1782: break;
1783:
1784: case 3: // read data before the first ']'
1785: if (ch != ']')
1786: bappend(ch);
1787: else
1788: st = 4;
1789: break;
1790:
1791: case 4: // read the second ']' or continue to read the data
1792: if (ch != ']') {
1793: bappend(']');
1794: bappend(ch);
1795: st = 3;
1796: } else {
1797: st = 5;
1798: }
1799: break;
1800:
1801: case 5: // read '>' or continue to read the data
1802: switch (ch) {
1803: case ']':
1804: bappend(']');
1805: break;
1806:
1807: case '>':
1808: bflash();
1809: st = -1;
1810: break;
1811:
1812: default:
1813: bappend(']');
1814: bappend(']');
1815: bappend(ch);
1816: st = 3;
1817: break;
1818: }
1819: break;
1820:
1821: default:
1822: panic(FAULT);
1823: }
1824: }
1825: }
1826:
1827: /**
1828: * Reads a xml name.
1829: *
1830: * The xml name must conform "Namespaces in XML" specification. Therefore
1831: * the ':' character is not allowed in the name. This method should be
1832: * used for PI and entity names which may not have a namespace according
1833: * to the specification mentioned above.
1834: *
1835: * @param ns The true value turns namespace conformance on.
1836: * @return The name has been read.
1837: * @exception SAXException When incorrect character appear in the name.
1838: * @exception IOException
1839: */
1840: private String name(boolean ns) throws SAXException, IOException {
1841: mBuffIdx = -1;
1842: bname(ns);
1843: return new String(mBuff, 1, mBuffIdx);
1844: }
1845:
1846: /**
1847: * Reads a qualified xml name.
1848: *
1849: * The characters of a qualified name is an array of characters. The
1850: * first (chars[0]) character is the index of the colon character which
1851: * separates the prefix from the local name. If the index is zero, the
1852: * name does not contain separator or the parser works in the namespace
1853: * unaware mode. The length of qualified name is the length of the array
1854: * minus one.
1855: *
1856: * @param ns The true value turns namespace conformance on.
1857: * @return The characters of a qualified name.
1858: * @exception SAXException When incorrect character appear in the name.
1859: * @exception IOException
1860: */
1861: private char[] qname(boolean ns) throws SAXException, IOException {
1862: mBuffIdx = -1;
1863: bname(ns);
1864: char chars[] = new char[mBuffIdx + 1];
1865: System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1866: return chars;
1867: }
1868:
1869: /**
1870: * Reads the public or/and system identifiers.
1871: *
1872: * @param inp The input object.
1873: * @exception SAXException
1874: * @exception IOException
1875: */
1876: private void pubsys(Input inp) throws SAXException, IOException {
1877: Pair pair = pubsys(' ');
1878: inp.pubid = pair.name;
1879: inp.sysid = pair.value;
1880: del(pair);
1881: }
1882:
1883: /**
1884: * Reads the public or/and system identifiers.
1885: *
1886: * @param flag The 'N' allows public id be without system id.
1887: * @return The public or/and system identifiers pair.
1888: * @exception SAXException
1889: * @exception IOException
1890: */
1891: private Pair pubsys(char flag) throws SAXException, IOException {
1892: Pair ids = pair(null);
1893: String str = name(false);
1894: if ("PUBLIC".equals(str) == true) {
1895: bqstr('i'); // non-CDATA normalization [#4.2.2]
1896: ids.name = new String(mBuff, 1, mBuffIdx);
1897: switch (wsskip()) {
1898: case '\"':
1899: case '\'':
1900: bqstr(' ');
1901: ids.value = new String(mBuff, 1, mBuffIdx);
1902: break;
1903:
1904: default:
1905: if (flag != 'N') // [#4.7]
1906: panic(FAULT);
1907: ids.value = null;
1908: break;
1909: }
1910: return ids;
1911: } else if ("SYSTEM".equals(str) == true) {
1912: ids.name = null;
1913: bqstr(' ');
1914: ids.value = new String(mBuff, 1, mBuffIdx);
1915: return ids;
1916: }
1917: panic(FAULT);
1918: return null;
1919: }
1920:
1921: /**
1922: * Reads an attribute value.
1923: *
1924: * The grammar which this method can read is:<br />
1925: * <code>eqstr := S "=" qstr</code><br />
1926: * <code>qstr := S ("'" string "'") |
1927: * ('"' string '"')</code><br />
1928: * This method resolves entities inside a string unless the parser
1929: * parses DTD.
1930: *
1931: * @param flag The '=' character forces the method
1932: * to accept the '=' character before quoted string.
1933: * @return The name has been read.
1934: * @exception SAXException
1935: * @exception IOException
1936: */
1937: private String eqstr(char flag) throws SAXException, IOException {
1938: if (flag == '=') {
1939: wsskip();
1940: if (next() != '=')
1941: panic(FAULT);
1942: }
1943: bqstr('-');
1944: return new String(mBuff, 1, mBuffIdx);
1945: }
1946:
1947: /**
1948: * Resoves an entity.
1949: *
1950: * This method resolves built-in and character entity references. It is
1951: * also reports external entities to the application.
1952: *
1953: * @param flag The 'x' character forces the method to report a skipped entity;
1954: * 'i' character - indicates non-CDATA normalization.
1955: * @exception SAXException
1956: * @exception IOException
1957: */
1958: private void ent(char flag) throws SAXException, IOException {
1959: char ch;
1960: int idx = mBuffIdx + 1;
1961: Input inp = null;
1962: String str = null;
1963: mESt = 0x100; // reset the built-in entity recognizer
1964: bappend('&');
1965: for (short st = 0; st >= 0;) {
1966: ch = (mChIdx < mChLen) ? mChars[mChIdx++] : next();
1967: switch (st) {
1968: case 0: // the first character of the entity name
1969: case 1: // read built-in entity name
1970: switch (chtyp(ch)) {
1971: case 'd':
1972: case '.':
1973: case '-':
1974: if (st != 1)
1975: panic(FAULT);
1976: case 'a':
1977: case 'A':
1978: case '_':
1979: case 'X':
1980: bappend(ch);
1981: eappend(ch);
1982: st = 1;
1983: break;
1984:
1985: case ':':
1986: if (mIsNSAware != false)
1987: panic(FAULT);
1988: bappend(ch);
1989: eappend(ch);
1990: st = 1;
1991: break;
1992:
1993: case ';':
1994: if (mESt < 0x100) {
1995: // The entity is a built-in entity
1996: mBuffIdx = idx - 1;
1997: bappend(mESt);
1998: st = -1;
1999: break;
2000: } else if (mSt == 2) {
2001: // In DTD entity declaration has to resolve character
2002: // entities and include "as is" others. [#4.4.7]
2003: bappend(';');
2004: st = -1;
2005: break;
2006: }
2007: // Convert an entity name to a string
2008: str = new String(mBuff, idx + 1, mBuffIdx - idx);
2009: inp = (Input) mEnt.get(str);
2010: // Restore the buffer offset
2011: mBuffIdx = idx - 1;
2012: if (inp != null) {
2013: if (inp.chars == null) {
2014: // External entity
2015: InputSource is = mHand.resolveEntity(
2016: inp.pubid, inp.sysid);
2017: if (is != null) {
2018: push(new Input(BUFFSIZE_READER));
2019: setinp(is);
2020: mInp.pubid = inp.pubid;
2021: mInp.sysid = inp.sysid;
2022: } else {
2023: // Unresolved external entity
2024: bflash();
2025: if (flag != 'x')
2026: panic(FAULT); // unknown entity within marckup
2027: mHand.skippedEntity(str);
2028: }
2029: } else {
2030: // Internal entity
2031: push(inp);
2032: }
2033: } else {
2034: // Unknown or general unparsed entity
2035: bflash();
2036: if (flag != 'x')
2037: panic(FAULT); // unknown entity within marckup
2038: mHand.skippedEntity(str);
2039: }
2040: st = -1;
2041: break;
2042:
2043: case '#':
2044: if (st != 0)
2045: panic(FAULT);
2046: st = 2;
2047: break;
2048:
2049: default:
2050: panic(FAULT);
2051: }
2052: break;
2053:
2054: case 2: // read character entity
2055: switch (chtyp(ch)) {
2056: case 'd':
2057: bappend(ch);
2058: break;
2059:
2060: case ';':
2061: // Convert the character entity to a character
2062: try {
2063: int i = Integer.parseInt(new String(mBuff,
2064: idx + 1, mBuffIdx - idx), 10);
2065: if (i >= 0xffff)
2066: panic(FAULT);
2067: ch = (char) i;
2068: } catch (NumberFormatException nfe) {
2069: panic(FAULT);
2070: }
2071: // Restore the buffer offset
2072: mBuffIdx = idx - 1;
2073: if (ch == ' ' || mInp.next != null)
2074: bappend(ch, flag);
2075: else
2076: bappend(ch);
2077: st = -1;
2078: break;
2079:
2080: case 'a':
2081: // If the entity buffer is empty and ch == 'x'
2082: if ((mBuffIdx == idx) && (ch == 'x')) {
2083: st = 3;
2084: break;
2085: }
2086: default:
2087: panic(FAULT);
2088: }
2089: break;
2090:
2091: case 3: // read hex character entity
2092: switch (chtyp(ch)) {
2093: case 'A':
2094: case 'a':
2095: case 'd':
2096: bappend(ch);
2097: break;
2098:
2099: case ';':
2100: // Convert the character entity to a character
2101: try {
2102: int i = Integer.parseInt(new String(mBuff,
2103: idx + 1, mBuffIdx - idx), 16);
2104: if (i >= 0xffff)
2105: panic(FAULT);
2106: ch = (char) i;
2107: } catch (NumberFormatException nfe) {
2108: panic(FAULT);
2109: }
2110: // Restore the buffer offset
2111: mBuffIdx = idx - 1;
2112: if (ch == ' ' || mInp.next != null)
2113: bappend(ch, flag);
2114: else
2115: bappend(ch);
2116: st = -1;
2117: break;
2118:
2119: default:
2120: panic(FAULT);
2121: }
2122: break;
2123:
2124: default:
2125: panic(FAULT);
2126: }
2127: }
2128: }
2129:
2130: /**
2131: * Resoves a parameter entity.
2132: *
2133: * This method resolves a parameter entity references. It is also reports
2134: * external entities to the application.
2135: *
2136: * @param flag The '-' instruct the method to do not set up surrounding
2137: * spaces [#4.4.8].
2138: * @exception SAXException
2139: * @exception IOException
2140: */
2141: private void pent(char flag) throws SAXException, IOException {
2142: char ch;
2143: int idx = mBuffIdx + 1;
2144: Input inp = null;
2145: String str = null;
2146: bappend('%');
2147: if (mSt != 2) // the DTD internal subset
2148: return; // Not Recognized [#4.4.1]
2149: // Read entity name
2150: bname(false);
2151: str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2152: if (next() != ';')
2153: panic(FAULT);
2154: inp = (Input) mPEnt.get(str);
2155: // Restore the buffer offset
2156: mBuffIdx = idx - 1;
2157: if (inp != null) {
2158: if (inp.chars == null) {
2159: // External parameter entity
2160: InputSource is = mHand.resolveEntity(inp.pubid,
2161: inp.sysid);
2162: if (is != null) {
2163: if (flag != '-')
2164: bappend(' '); // tail space
2165: push(new Input(BUFFSIZE_READER));
2166: // Need revisit: there is no leading space! [#4.4.8]
2167: setinp(is);
2168: mInp.pubid = inp.pubid;
2169: mInp.sysid = inp.sysid;
2170: } else {
2171: // Unresolved external parameter entity
2172: mHand.skippedEntity("%" + str);
2173: }
2174: } else {
2175: // Internal parameter entity
2176: if (flag == '-') {
2177: // No surrounding spaces
2178: inp.chIdx = 1;
2179: } else {
2180: // Insert surrounding spaces
2181: bappend(' '); // tail space
2182: inp.chIdx = 0;
2183: }
2184: push(inp);
2185: }
2186: } else {
2187: // Unknown parameter entity
2188: mHand.skippedEntity("%" + str);
2189: }
2190: }
2191:
2192: /**
2193: * Recognizes and handles a namespace declaration.
2194: *
2195: * This method identifies a type of namespace declaration if any and
2196: * puts new mapping on top of prefix stack.
2197: *
2198: * @param name The attribute qualified name (<code>name.value</code> is a
2199: * <code>String</code> object which represents the attribute prefix).
2200: * @param value The attribute value.
2201: * @return <code>true</code> if a namespace declaration is recognized.
2202: */
2203: private boolean isdecl(Pair name, String value) {
2204: if (name.chars[0] == 0) {
2205: if ("xmlns".equals(name.name) == true) {
2206: // New default namespace declaration
2207: mPref = pair(mPref);
2208: mPref.value = value;
2209: mPref.name = "";
2210: mPref.chars = NONS;
2211: return true;
2212: }
2213: } else {
2214: if (name.eqpref(XMLNS) == true) {
2215: // New prefix declaration
2216: int len = name.name.length();
2217: mPref = pair(mPref);
2218: mPref.value = value;
2219: mPref.name = name.name;
2220: mPref.chars = new char[len + 1];
2221: mPref.chars[0] = (char) (len + 1);
2222: name.name.getChars(0, len, mPref.chars, 1);
2223: return true;
2224: }
2225: }
2226: return false;
2227: }
2228:
2229: /**
2230: * Resolves a prefix.
2231: *
2232: * @return The namespace assigned to the prefix.
2233: * @exception SAXException When mapping for specified prefix is not found.
2234: */
2235: private String rslv(char[] qname) throws SAXException {
2236: for (Pair pref = mPref; pref != null; pref = pref.next) {
2237: if (pref.eqpref(qname) == true)
2238: return pref.value;
2239: }
2240: if (qname[0] == 1) { // QNames like ':local'
2241: for (Pair pref = mPref; pref != null; pref = pref.next) {
2242: if (pref.chars[0] == 0)
2243: return pref.value;
2244: }
2245: }
2246: panic(FAULT);
2247: return null;
2248: }
2249:
2250: /**
2251: * Skips xml white space characters.
2252: *
2253: * This method skips white space characters (' ', '\t', '\n', '\r') and
2254: * looks ahead not white space character.
2255: *
2256: * @return The first not white space look ahead character.
2257: * @exception SAXException When End Of Stream character typed.
2258: * @exception IOException
2259: */
2260: private char wsskip() throws SAXException, IOException {
2261: char ch;
2262: char type;
2263: while (true) {
2264: // Read next character
2265: ch = (mChIdx < mChLen) ? mChars[mChIdx++] : next();
2266: type = (char) 0; // [X]
2267: if (ch < 0x80) {
2268: type = (char) nmttyp[ch];
2269: } else if (ch == EOS) {
2270: panic(FAULT);
2271: }
2272: if (type != 3) { // [ \t\n\r]
2273: mChIdx--; // back();
2274: return ch;
2275: }
2276: }
2277: }
2278:
2279: /**
2280: * Notifies the handler about fatal parsing error.
2281: *
2282: * @param msg The problem description message.
2283: */
2284: private void panic(String msg) throws SAXException {
2285: SAXParseException spe = new SAXParseException(msg, this );
2286: mHand.fatalError(spe);
2287: throw spe; // [#1.2] fatal error definition
2288: }
2289:
2290: /**
2291: * Reads a qualified xml name.
2292: *
2293: * This is low level routine which leaves a qName in the buffer.
2294: * The characters of a qualified name is an array of characters. The
2295: * first (chars[0]) character is the index of the colon character which
2296: * separates the prefix from the local name. If the index is zero, the
2297: * name does not contain separator or the parser works in the namespace
2298: * unaware mode. The length of qualified name is the length of the array
2299: * minus one.
2300: *
2301: * @param ns The true value turns namespace conformance on.
2302: * @exception SAXException When incorrect character appear in the name.
2303: * @exception IOException
2304: */
2305: private void bname(boolean ns) throws SAXException, IOException {
2306: char ch;
2307: char type;
2308: mBuffIdx++; // allocate a char for colon offset
2309: int bqname = mBuffIdx;
2310: int bcolon = bqname;
2311: int bchidx = bqname + 1;
2312: int bstart = bchidx;
2313: int cstart = mChIdx;
2314: short st = (short) ((ns == true) ? 0 : 2);
2315: while (true) {
2316: // Read next character
2317: if (mChIdx >= mChLen) {
2318: bcopy(cstart, bstart);
2319: next();
2320: mChIdx--; // back();
2321: cstart = mChIdx;
2322: bstart = bchidx;
2323: }
2324: ch = mChars[mChIdx++];
2325: type = (char) 0; // [X]
2326: if (ch < 0x80) {
2327: type = (char) nmttyp[ch];
2328: } else if (ch == EOS) {
2329: panic(FAULT);
2330: }
2331: // Parse QName
2332: switch (st) {
2333: case 0: // read the first char of the prefix
2334: case 2: // read the first char of the suffix
2335: switch (type) {
2336: case 0: // [aA_X]
2337: bchidx++; // append char to the buffer
2338: st++; // (st == 0)? 1: 3;
2339: break;
2340:
2341: case 1: // [:]
2342: mChIdx--; // back();
2343: st++; // (st == 0)? 1: 3;
2344: break;
2345:
2346: default:
2347: panic(FAULT);
2348: }
2349: break;
2350:
2351: case 1: // read the prefix
2352: case 3: // read the suffix
2353: switch (type) {
2354: case 0: // [aA_X]
2355: case 2: // [.-d]
2356: bchidx++; // append char to the buffer
2357: break;
2358:
2359: case 1: // [:]
2360: bchidx++; // append char to the buffer
2361: if (ns == true) {
2362: if (bcolon != bqname)
2363: panic(FAULT); // it must be only one colon
2364: bcolon = bchidx - 1;
2365: if (st == 1)
2366: st = 2;
2367: }
2368: break;
2369:
2370: default:
2371: mChIdx--; // back();
2372: bcopy(cstart, bstart);
2373: mBuff[bqname] = (char) (bcolon - bqname);
2374: return;
2375: }
2376: break;
2377:
2378: default:
2379: panic(FAULT);
2380: }
2381: }
2382: }
2383:
2384: /**
2385: * Reads a nmtoken.
2386: *
2387: * This is low level routine which leaves a nmtoken in the buffer.
2388: *
2389: * @exception SAXException When incorrect character appear in the name.
2390: * @exception IOException
2391: */
2392: private void bntok() throws SAXException, IOException {
2393: char ch;
2394: mBuffIdx = -1;
2395: bappend((char) 0); // default offset to the colon char
2396: while (true) {
2397: ch = next();
2398: switch (chtyp(ch)) {
2399: case 'a':
2400: case 'A':
2401: case 'd':
2402: case '.':
2403: case ':':
2404: case '-':
2405: case '_':
2406: case 'X':
2407: bappend(ch);
2408: break;
2409:
2410: default:
2411: back();
2412: return;
2413: }
2414: }
2415: }
2416:
2417: /**
2418: * Recognizes a keyword.
2419: *
2420: * This is low level routine which recognizes one of keywords in the buffer.
2421: * Keyword Id
2422: * ID - i
2423: * IDREF - r
2424: * IDREFS - R
2425: * ENTITY - n
2426: * ENTITIES - N
2427: * NMTOKEN - t
2428: * NMTOKENS - T
2429: * ELEMENT - e
2430: * ATTLIST - a
2431: * NOTATION - o
2432: * CDATA - c
2433: * REQUIRED - Q
2434: * IMPLIED - I
2435: * FIXED - F
2436: *
2437: * @return an id of a keyword or '?'.
2438: * @exception SAXException When incorrect character appear in the name.
2439: * @exception IOException
2440: */
2441: private char bkeyword() throws SAXException, IOException {
2442: String str = new String(mBuff, 1, mBuffIdx);
2443: switch (str.length()) {
2444: case 2: // ID
2445: return ("ID".equals(str) == true) ? 'i' : '?';
2446:
2447: case 5: // IDREF, CDATA, FIXED
2448: switch (mBuff[1]) {
2449: case 'I':
2450: return ("IDREF".equals(str) == true) ? 'r' : '?';
2451: case 'C':
2452: return ("CDATA".equals(str) == true) ? 'c' : '?';
2453: case 'F':
2454: return ("FIXED".equals(str) == true) ? 'F' : '?';
2455: default:
2456: break;
2457: }
2458: break;
2459:
2460: case 6: // IDREFS, ENTITY
2461: switch (mBuff[1]) {
2462: case 'I':
2463: return ("IDREFS".equals(str) == true) ? 'R' : '?';
2464: case 'E':
2465: return ("ENTITY".equals(str) == true) ? 'n' : '?';
2466: default:
2467: break;
2468: }
2469: break;
2470:
2471: case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2472: switch (mBuff[1]) {
2473: case 'I':
2474: return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2475: case 'N':
2476: return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2477: case 'A':
2478: return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2479: case 'E':
2480: return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2481: default:
2482: break;
2483: }
2484: break;
2485:
2486: case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2487: switch (mBuff[2]) {
2488: case 'N':
2489: return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2490: case 'M':
2491: return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2492: case 'O':
2493: return ("NOTATION".equals(str) == true) ? 'o' : '?';
2494: case 'E':
2495: return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2496: default:
2497: break;
2498: }
2499: break;
2500:
2501: default:
2502: break;
2503: }
2504: return '?';
2505: }
2506:
2507: /**
2508: * Reads a single or double quotted string in to the buffer.
2509: *
2510: * This method resolves entities inside a string unless the parser
2511: * parses DTD.
2512: *
2513: * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
2514: * '-' - not an attribute value; 'd' - in DTD context.
2515: * @exception SAXException
2516: * @exception IOException
2517: */
2518: private void bqstr(char flag) throws SAXException, IOException {
2519: Input inp = mInp; // remember the original input
2520: mBuffIdx = -1;
2521: bappend((char) 0); // default offset to the colon char
2522: char ch;
2523: for (short st = 0; st >= 0;) {
2524: ch = (mChIdx < mChLen) ? mChars[mChIdx++] : next();
2525: switch (st) {
2526: case 0: // read a single or double quote
2527: switch (ch) {
2528: case ' ':
2529: case '\n':
2530: case '\r':
2531: case '\t':
2532: break;
2533:
2534: case '\'':
2535: st = 2; // read a single quoted string
2536: break;
2537:
2538: case '\"':
2539: st = 3; // read a double quoted string
2540: break;
2541:
2542: default:
2543: panic(FAULT);
2544: break;
2545: }
2546: break;
2547:
2548: case 2: // read a single quoted string
2549: case 3: // read a double quoted string
2550: switch (ch) {
2551: case '\'':
2552: if ((st == 2) && (mInp == inp))
2553: st = -1;
2554: else
2555: bappend(ch);
2556: break;
2557:
2558: case '\"':
2559: if ((st == 3) && (mInp == inp))
2560: st = -1;
2561: else
2562: bappend(ch);
2563: break;
2564:
2565: case '&':
2566: if (flag != 'd')
2567: ent(flag);
2568: else
2569: bappend(ch);
2570: break;
2571:
2572: case '%':
2573: if (flag == 'd')
2574: pent('-');
2575: else
2576: bappend(ch);
2577: break;
2578:
2579: case '<':
2580: if ((flag == '-') || (flag == 'd'))
2581: bappend(ch);
2582: else
2583: panic(FAULT);
2584: break;
2585:
2586: case EOS: // EOS before single/double quote
2587: panic(FAULT);
2588:
2589: case '\r': // EOL processing [#2.11 & #3.3.3]
2590: if (flag != ' ' && mInp.next == null) {
2591: if (next() != '\n')
2592: back();
2593: ch = '\n';
2594: }
2595: default:
2596: bappend(ch, flag);
2597: break;
2598: }
2599: break;
2600:
2601: default:
2602: panic(FAULT);
2603: }
2604: }
2605: // There is maximum one space at the end of the string in
2606: // i-mode (non CDATA normalization) and it has to be removed.
2607: if ((flag == 'i') && (mBuff[mBuffIdx] == ' '))
2608: mBuffIdx -= 1;
2609: }
2610:
2611: /**
2612: * Reports characters and empties the parser's buffer.
2613: */
2614: private void bflash() throws SAXException {
2615: if (mBuffIdx >= 0) {
2616: // Textual data has been read
2617: mHand.characters(mBuff, 0, (mBuffIdx + 1));
2618: mBuffIdx = -1;
2619: }
2620: }
2621:
2622: /**
2623: * Appends a character to parser's buffer with normalization.
2624: *
2625: * @param ch The character to append to the buffer.
2626: * @param mode The normalization mode.
2627: */
2628: private void bappend(char ch, char mode) {
2629: // This implements attribute value normalization as
2630: // described in the XML specification [#3.3.3].
2631: switch (mode) {
2632: case 'i': // non CDATA normalization
2633: switch (ch) {
2634: case ' ':
2635: case '\n':
2636: case '\r':
2637: case '\t':
2638: if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' '))
2639: bappend(' ');
2640: return;
2641:
2642: default:
2643: break;
2644: }
2645: break;
2646:
2647: case 'c': // CDATA normalization
2648: switch (ch) {
2649: case '\n':
2650: case '\r':
2651: case '\t':
2652: ch = ' ';
2653: break;
2654:
2655: default:
2656: break;
2657: }
2658: break;
2659:
2660: default: // no normalization
2661: break;
2662: }
2663: mBuffIdx++;
2664: if (mBuffIdx < mBuff.length) {
2665: mBuff[mBuffIdx] = ch;
2666: } else {
2667: mBuffIdx--;
2668: bappend(ch);
2669: }
2670: }
2671:
2672: /**
2673: * Appends a character to parser's buffer.
2674: *
2675: * @param ch The character to append to the buffer.
2676: */
2677: private void bappend(char ch) {
2678: try {
2679: mBuff[++mBuffIdx] = ch;
2680: } catch (Exception exp) {
2681: // Double the buffer size
2682: char buff[] = new char[mBuff.length << 1];
2683: System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2684: mBuff = buff;
2685: mBuff[mBuffIdx] = ch;
2686: }
2687: }
2688:
2689: /**
2690: * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2691: * parser's buffer (mBuff).
2692: *
2693: * @param cidx The character buffer (mChars) start index.
2694: * @param bidx The parser buffer (mBuff) start index.
2695: */
2696: private void bcopy(int cidx, int bidx) {
2697: int length = mChIdx - cidx;
2698: if ((bidx + length + 1) >= mBuff.length) {
2699: // Expand the buffer
2700: char buff[] = new char[mBuff.length + length];
2701: System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2702: mBuff = buff;
2703: }
2704: System.arraycopy(mChars, cidx, mBuff, bidx, length);
2705: mBuffIdx += length;
2706: }
2707:
2708: /**
2709: * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2710: * <i>apos</i>, <i>quot</i>.
2711: * The initial state is 0x100. Any state belowe 0x100 is a built-in
2712: * entity replacement character.
2713: *
2714: * @param ch the next character of an entity name.
2715: */
2716: private void eappend(char ch) {
2717: switch (mESt) {
2718: case 0x100: // "l" or "g" or "a" or "q"
2719: switch (ch) {
2720: case 'l':
2721: mESt = 0x101;
2722: break;
2723: case 'g':
2724: mESt = 0x102;
2725: break;
2726: case 'a':
2727: mESt = 0x103;
2728: break;
2729: case 'q':
2730: mESt = 0x107;
2731: break;
2732: default:
2733: mESt = 0x200;
2734: break;
2735: }
2736: break;
2737: case 0x101: // "lt"
2738: mESt = (ch == 't') ? '<' : (char) 0x200;
2739: break;
2740: case 0x102: // "gt"
2741: mESt = (ch == 't') ? '>' : (char) 0x200;
2742: break;
2743: case 0x103: // "am" or "ap"
2744: switch (ch) {
2745: case 'm':
2746: mESt = 0x104;
2747: break;
2748: case 'p':
2749: mESt = 0x105;
2750: break;
2751: default:
2752: mESt = 0x200;
2753: break;
2754: }
2755: break;
2756: case 0x104: // "amp"
2757: mESt = (ch == 'p') ? '&' : (char) 0x200;
2758: break;
2759: case 0x105: // "apo"
2760: mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2761: break;
2762: case 0x106: // "apos"
2763: mESt = (ch == 's') ? '\'' : (char) 0x200;
2764: break;
2765: case 0x107: // "qu"
2766: mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2767: break;
2768: case 0x108: // "quo"
2769: mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2770: break;
2771: case 0x109: // "quot"
2772: mESt = (ch == 't') ? '\"' : (char) 0x200;
2773: break;
2774: case '<': // "lt"
2775: case '>': // "gt"
2776: case '&': // "amp"
2777: case '\'': // "apos"
2778: case '\"': // "quot"
2779: mESt = 0x200;
2780: default:
2781: break;
2782: }
2783: }
2784:
2785: /**
2786: * Sets up a new input source on the top of the input stack.
2787: * Note, the first byte returned by the entity's byte stream has to be the
2788: * first byte in the entity. However, the parser does not expect the byte
2789: * order mask in both cases when encoding is provided by the input source.
2790: *
2791: * @param is A new input source to set up.
2792: * @exception IOException If any IO errors occur.
2793: * @exception SAXException If the input source cannot be read.
2794: */
2795: private void setinp(InputSource is) throws SAXException,
2796: IOException {
2797: Reader reader = null;
2798: mChIdx = 0;
2799: mChLen = 0;
2800: mChars = mInp.chars;
2801: mInp.src = null;
2802: if (mSt == 0)
2803: mIsSAlone = false; // default [#2.9]
2804: if (is.getCharacterStream() != null) {
2805: // Ignore encoding in the xml text decl.
2806: reader = is.getCharacterStream();
2807: xml(reader);
2808: } else if (is.getByteStream() != null) {
2809: String expenc;
2810: if (is.getEncoding() != null) {
2811: // Ignore encoding in the xml text decl.
2812: expenc = is.getEncoding().toUpperCase();
2813: if (expenc.equals("UTF-16"))
2814: reader = bom(is.getByteStream(), 'U'); // UTF-16 [#4.3.3]
2815: else
2816: reader = enc(expenc, is.getByteStream());
2817: xml(reader);
2818: } else {
2819: // Get encoding from BOM or the xml text decl.
2820: reader = bom(is.getByteStream(), ' ');
2821: if (reader == null) {
2822: // Encoding is defined by the xml text decl.
2823: reader = enc("UTF-8", is.getByteStream());
2824: expenc = xml(reader);
2825: if (expenc.startsWith("UTF-16"))
2826: panic(FAULT); // UTF-16 must have BOM [#4.3.3]
2827: reader = enc(expenc, is.getByteStream());
2828: } else {
2829: // Encoding is defined by the BOM.
2830: xml(reader);
2831: }
2832: }
2833: } else {
2834: // There is no support for public/system identifiers.
2835: panic(FAULT);
2836: }
2837: mInp.src = reader;
2838: mInp.pubid = is.getPublicId();
2839: mInp.sysid = is.getSystemId();
2840: }
2841:
2842: /**
2843: * Determines the entity encoding.
2844: *
2845: * This method gets encoding from Byte Order Mask [#4.3.3] if any.
2846: * Note, the first byte returned by the entity's byte stream has
2847: * to be the first byte in the entity. Also, there is no support
2848: * for UCS-4.
2849: *
2850: * @param is A byte stream of the entity.
2851: * @param hint An encoding hint, character U means UTF-16.
2852: * @return a reader constructed from the BOM or UTF-8 by default.
2853: * @exception SAXException
2854: * @exception IOException
2855: */
2856: private Reader bom(InputStream is, char hint) throws SAXException,
2857: IOException {
2858: int val = is.read();
2859: switch (val) {
2860: case 0xef: // UTF-8
2861: if (hint == 'U') // must be UTF-16
2862: panic(FAULT);
2863: if (is.read() != 0xbb)
2864: panic(FAULT);
2865: if (is.read() != 0xbf)
2866: panic(FAULT);
2867: return new ReaderUTF8(is);
2868:
2869: case 0xfe: // UTF-16, big-endian
2870: if (is.read() != 0xff)
2871: panic(FAULT);
2872: return new ReaderUTF16(is, 'b');
2873:
2874: case 0xff: // UTF-16, little-endian
2875: if (is.read() != 0xfe)
2876: panic(FAULT);
2877: return new ReaderUTF16(is, 'l');
2878:
2879: case -1:
2880: mChars[mChIdx++] = EOS;
2881: return new ReaderUTF8(is);
2882:
2883: default:
2884: if (hint == 'U') // must be UTF-16
2885: panic(FAULT);
2886: // Read the rest of UTF-8 character
2887: switch (val & 0xf0) {
2888: case 0xc0:
2889: case 0xd0:
2890: mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is
2891: .read() & 0x3f));
2892: break;
2893:
2894: case 0xe0:
2895: mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2896: | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2897: break;
2898:
2899: case 0xf0: // UCS-4 character
2900: throw new UnsupportedEncodingException();
2901:
2902: default:
2903: mChars[mChIdx++] = (char) val;
2904: break;
2905: }
2906: return null;
2907: }
2908: }
2909:
2910: /**
2911: * Parses the xml text declaration.
2912: *
2913: * This method gets encoding from the xml text declaration [#4.3.1] if any.
2914: * The method assumes the buffer (mChars) is big enough to accomodate whole
2915: * xml text declaration.
2916: *
2917: * @param reader is entity reader.
2918: * @return The xml text declaration encoding or default UTF-8 encoding.
2919: * @exception SAXException
2920: * @exception IOException
2921: */
2922: private String xml(Reader reader) throws SAXException, IOException {
2923: String str = null;
2924: String enc = "UTF-8";
2925: char ch;
2926: int val;
2927: short st;
2928: // Read the xml text declaration into the buffer
2929: if (mChIdx != 0) {
2930: // The bom method have read ONE char into the buffer.
2931: st = (short) ((mChars[0] == '<') ? 1 : -1);
2932: } else {
2933: st = 0;
2934: }
2935: while (st >= 0 && mChIdx < mChars.length) {
2936: ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
2937: mChars[mChIdx++] = ch;
2938: switch (st) {
2939: case 0: // read '<' of xml declaration
2940: switch (ch) {
2941: case '<':
2942: st = 1;
2943: break;
2944:
2945: case 0xfeff: // the byte order mask
2946: ch = ((val = reader.read()) >= 0) ? (char) val
2947: : EOS;
2948: mChars[mChIdx - 1] = ch;
2949: st = (short) ((ch == '<') ? 1 : -1);
2950: break;
2951:
2952: default:
2953: st = -1;
2954: break;
2955: }
2956: break;
2957:
2958: case 1: // read '?' of xml declaration [#4.3.1]
2959: st = (short) ((ch == '?') ? 2 : -1);
2960: break;
2961:
2962: case 2: // read 'x' of xml declaration [#4.3.1]
2963: st = (short) ((ch == 'x') ? 3 : -1);
2964: break;
2965:
2966: case 3: // read 'm' of xml declaration [#4.3.1]
2967: st = (short) ((ch == 'm') ? 4 : -1);
2968: break;
2969:
2970: case 4: // read 'l' of xml declaration [#4.3.1]
2971: st = (short) ((ch == 'l') ? 5 : -1);
2972: break;
2973:
2974: case 5: // read white space after 'xml'
2975: switch (ch) {
2976: case ' ':
2977: case '\t':
2978: case '\r':
2979: case '\n':
2980: st = 6;
2981: break;
2982:
2983: default:
2984: st = -1;
2985: break;
2986: }
2987: break;
2988:
2989: case 6: // read content of xml declaration
2990: switch (ch) {
2991: case '?':
2992: st = 7;
2993: break;
2994:
2995: case EOS:
2996: st = -2;
2997: break;
2998:
2999: default:
3000: break;
3001: }
3002: break;
3003:
3004: case 7: // read '>' after '?' of xml declaration
3005: switch (ch) {
3006: case '>':
3007: case EOS:
3008: st = -2;
3009: break;
3010:
3011: default:
3012: st = 6;
3013: break;
3014: }
3015: break;
3016:
3017: default:
3018: panic(FAULT);
3019: break;
3020: }
3021: }
3022: mChLen = mChIdx;
3023: mChIdx = 0;
3024: // If there is no xml text declaration, the encoding is default.
3025: if (st == -1) {
3026: return enc;
3027: }
3028: mChIdx = 5; // the first white space after "<?xml"
3029: // Parse the xml text declaration
3030: for (st = 0; st >= 0;) {
3031: ch = next();
3032: switch (st) {
3033: case 0: // skip spaces after the xml declaration name
3034: if (chtyp(ch) != ' ') {
3035: back();
3036: st = 1;
3037: }
3038: break;
3039:
3040: case 1: // read xml declaration version
3041: case 2: // read xml declaration encoding or standalone
3042: case 3: // read xml declaration standalone
3043: switch (chtyp(ch)) {
3044: case 'a':
3045: case 'A':
3046: case '_':
3047: back();
3048: str = name(false).toLowerCase();
3049: if ("version".equals(str) == true) {
3050: if (st != 1)
3051: panic(FAULT);
3052: if ("1.0".equals(eqstr('=')) != true)
3053: panic(FAULT);
3054: st = 2;
3055: } else if ("encoding".equals(str) == true) {
3056: if (st != 2)
3057: panic(FAULT);
3058: enc = eqstr('=').toUpperCase();
3059: st = 3;
3060: } else if ("standalone".equals(str) == true) {
3061: if ((st == 1) || (mSt != 0)) // [#4.3.1]
3062: panic(FAULT);
3063: str = eqstr('=').toLowerCase();
3064: // Check the 'standalone' value and use it
3065: if (str.equals("yes") == true) {
3066: mIsSAlone = true;
3067: } else if (str.equals("no") == true) {
3068: mIsSAlone = false;
3069: } else {
3070: panic(FAULT);
3071: }
3072: st = 4;
3073: } else {
3074: panic(FAULT);
3075: }
3076: break;
3077:
3078: case ' ':
3079: break;
3080:
3081: case '?':
3082: if (st == 1)
3083: panic(FAULT);
3084: back();
3085: st = 4;
3086: break;
3087:
3088: default:
3089: panic(FAULT);
3090: }
3091: break;
3092:
3093: case 4: // end of xml declaration
3094: switch (chtyp(ch)) {
3095: case '?':
3096: if (next() != '>')
3097: panic(FAULT);
3098: if (mSt == 0) // the begining of the document
3099: mSt = 1; // misc before DTD
3100: st = -1;
3101: break;
3102:
3103: case ' ':
3104: break;
3105:
3106: default:
3107: panic(FAULT);
3108: }
3109: break;
3110:
3111: default:
3112: panic(FAULT);
3113: }
3114: }
3115: return enc;
3116: }
3117:
3118: /**
3119: * Sets up the document reader.
3120: *
3121: * @param name an encoding name.
3122: * @param is the document byte input stream.
3123: * @return a reader constructed from encoding name and input stream.
3124: * @exception UnsupportedEncodingException
3125: */
3126: private Reader enc(String name, InputStream is)
3127: throws java.io.UnsupportedEncodingException {
3128: // DO NOT CLOSE current reader if any!
3129: if (name.equals("UTF-8"))
3130: return new ReaderUTF8(is);
3131: else if (name.equals("UTF-16LE"))
3132: return new ReaderUTF16(is, 'l');
3133: else if (name.equals("UTF-16BE"))
3134: return new ReaderUTF16(is, 'b');
3135: else
3136: return new InputStreamReader(is, name);
3137: }
3138:
3139: /**
3140: * Sets up current input on the top of the input stack.
3141: *
3142: * @param inp A new input to set up.
3143: */
3144: private void push(Input inp) {
3145: mInp.chLen = mChLen;
3146: mInp.chIdx = mChIdx;
3147: inp.next = mInp;
3148: mInp = inp;
3149: mChars = inp.chars;
3150: mChLen = inp.chLen;
3151: mChIdx = inp.chIdx;
3152: }
3153:
3154: /**
3155: * Restores previous input on the top of the input stack.
3156: */
3157: private void pop() {
3158: if (mInp.src != null) {
3159: try {
3160: mInp.src.close();
3161: } catch (IOException ioe) {
3162: }
3163: mInp.src = null;
3164: }
3165: mInp = mInp.next;
3166: if (mInp != null) {
3167: mChars = mInp.chars;
3168: mChLen = mInp.chLen;
3169: mChIdx = mInp.chIdx;
3170: } else {
3171: mChars = null;
3172: mChLen = 0;
3173: mChIdx = 0;
3174: }
3175: }
3176:
3177: /**
3178: * Maps a character to it's type.
3179: *
3180: * Possible character type values are:<br />
3181: * - ' ' for any kind of white space character;<br />
3182: * - 'a' for any lower case alphabetical character value;<br />
3183: * - 'A' for any upper case alphabetical character value;<br />
3184: * - 'd' for any decimal digit character value;<br />
3185: * - 'z' for any character less then ' ' except
3186: * '\t', '\n', '\r';<br />
3187: * - 'X' for any not ASCII character;<br />
3188: * - 'Z' for EOS character.<br />
3189: * An ASCII (7 bit) character which does not fall in any category listed
3190: * above is mapped to it self.
3191: *
3192: * @param ch The character to map.
3193: * @return The type of character.
3194: * @exception SAXException When End Of Stream character typed.
3195: */
3196: private char chtyp(char ch) throws SAXException {
3197: if (ch < 0x80)
3198: return (char) asctyp[ch];
3199: return (ch != EOS) ? 'X' : 'Z';
3200: }
3201:
3202: /**
3203: * Retrives the next character in the document.
3204: *
3205: * @return The next character in the document.
3206: */
3207: private char next() throws java.io.IOException {
3208: if (mChIdx >= mChLen) {
3209: if (mInp.src == null) {
3210: pop(); // remove internal entity
3211: return next();
3212: }
3213: // Read new portion of the document characters
3214: int Num = mInp.src.read(mChars, 0, mChars.length);
3215: if (Num < 0) {
3216: if (mInp != mDoc) {
3217: pop(); // restore the previous input
3218: return next();
3219: } else {
3220: mChars[0] = EOS;
3221: mChLen = 1;
3222: }
3223: } else
3224: mChLen = Num;
3225: mChIdx = 0;
3226: }
3227: return mChars[mChIdx++];
3228: }
3229:
3230: /**
3231: * Puts back the last read character.
3232: *
3233: * This method <strong>MUST NOT</strong> be called more then once after
3234: * each call of {@link #next next} method.
3235: */
3236: private void back() throws SAXException {
3237: if (mChIdx <= 0)
3238: panic(FAULT);
3239: mChIdx--;
3240: }
3241:
3242: /**
3243: * Sets the current character.
3244: *
3245: * @param ch The character to set.
3246: */
3247: private void setch(char ch) {
3248: mChars[mChIdx] = ch;
3249: }
3250:
3251: /**
3252: * Finds a pair in the pair chain by a qualified name.
3253: *
3254: * @param chain The first element of the chain of pairs.
3255: * @param qname The qualified name.
3256: * @return A pair with the specified qualified name or null.
3257: */
3258: private Pair find(Pair chain, char[] qname) {
3259: for (Pair pair = chain; pair != null; pair = pair.next) {
3260: if (pair.eqname(qname) == true)
3261: return pair;
3262: }
3263: return null;
3264: }
3265:
3266: /**
3267: * Provedes an instance of a pair.
3268: *
3269: * @param next The reference to a next pair.
3270: * @return An instance of a pair.
3271: */
3272: private Pair pair(Pair next) {
3273: Pair pair;
3274:
3275: if (mDltd != null) {
3276: pair = mDltd;
3277: mDltd = pair.next;
3278: } else {
3279: pair = new Pair();
3280: }
3281: pair.next = next;
3282:
3283: return pair;
3284: }
3285:
3286: /**
3287: * Deletes an instance of a pair.
3288: *
3289: * @param pair The pair to delete.
3290: * @return A reference to the next pair in a chain.
3291: */
3292: private Pair del(Pair pair) {
3293: Pair next = pair.next;
3294:
3295: pair.name = null;
3296: pair.value = null;
3297: pair.chars = null;
3298: pair.list = null;
3299: pair.next = mDltd;
3300: mDltd = pair;
3301:
3302: return next;
3303: }
3304: }
|