0001: /*
0002: * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
0003: * Copyright (C) 2006 - Javolution (http://javolution.org/)
0004: * All rights reserved.
0005: *
0006: * Permission to use, copy, modify, and distribute this software is
0007: * freely granted, provided that this notice is preserved.
0008: */
0009: package javolution.xml.stream;
0010:
0011: import java.io.IOException;
0012: import java.io.InputStream;
0013: import java.io.InputStreamReader;
0014: import java.io.Reader;
0015: import java.io.UnsupportedEncodingException;
0016:
0017: import javolution.context.ObjectFactory;
0018: import javolution.io.UTF8StreamReader;
0019: import javolution.lang.Reusable;
0020: import javolution.text.CharArray;
0021: import javolution.xml.sax.Attributes;
0022: import j2me.lang.CharSequence;
0023: import j2me.lang.IllegalStateException;
0024: import j2me.util.Map;
0025: import j2mex.realtime.MemoryArea;
0026:
0027: /**
0028: * <p> This class represents a {@link javolution.lang.Reusable reusable}
0029: * implementation of {@link XMLStreamWriter}.</p>
0030: *
0031: * <p> Except for the types being used ({@link CharArray CharArray}/
0032: * {@link CharSequence CharSequence} instead of {@link String}) the
0033: * parsing behavior is about the same as for the standard
0034: * <code>javax.xml.stream.XMLStreamReader</code> (although several times
0035: * faster).</p>
0036: *
0037: * <p> The {@link CharArray CharArray} instances returned by this reader
0038: * supports fast primitive conversions as illustrated below:[code]
0039: *
0040: * // Creates reader for an input sream with unknown encoding.
0041: * XMLStreamReaderImpl xmlReader = new XMLStreamReaderImpl().setInput(inputStream);
0042: *
0043: * // Parses.
0044: * for (int e=xmlReader.next(); e != XMLStreamConstants.END_DOCUMENT; e = xmlReader.next()) {
0045: * switch (e) { // Event.
0046: * case XMLStreamConstants.START_ELEMENT:
0047: * if (xmlReader.getLocalName().equals("Time")) {
0048: * // Reads primitive types (int) attributes directly.
0049: * int hour = xmlReader.getAttributeValue("hour").toInt();
0050: * int minute = xmlReader.getAttributeValue("minute").toInt();
0051: * int second = xmlReader.getAttributeValue("second").toInt();
0052: * ...
0053: * }
0054: * ...
0055: * break;
0056: * }
0057: * }
0058: *
0059: * // Closes reader, it is automatically reset() and can be reused!
0060: * xmlReader.close();
0061: * [/code]</p>
0062: *
0063: * <p> This reader returns all contiguous character data in a single
0064: * chunk (always coalescing). It is non-validating (DTD is returned
0065: * unparsed). Although, users may define custom entities mapping using
0066: * the {@link #setEntities} method (e.g. after parsing/resolving
0067: * external entities).</p>
0068: *
0069: * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
0070: * @version 4.0, September 4, 2006
0071: */
0072: public final class XMLStreamReaderImpl implements XMLStreamReader,
0073: Reusable {
0074:
0075: /**
0076: * Holds the textual representation for events.
0077: */
0078: static final String[] NAMES_OF_EVENTS = new String[] { "UNDEFINED",
0079: "START_ELEMENT", "END_ELEMENT", "PROCESSING_INSTRUCTIONS",
0080: "CHARACTERS", "COMMENT", "SPACE", "START_DOCUMENT",
0081: "END_DOCUMENT", "ENTITY_REFERENCE", "ATTRIBUTE", "DTD",
0082: "CDATA", "NAMESPACE", "NOTATION_DECLARATION",
0083: "ENTITY_DECLARATION" };
0084:
0085: /**
0086: * Holds the reader buffer capacity.
0087: */
0088: static final int READER_BUFFER_CAPACITY = 4096;
0089:
0090: /**
0091: * Holds the prolog if any.
0092: */
0093: CharArray _prolog;
0094:
0095: /**
0096: * Holds object factory when factory-produced.
0097: */
0098: ObjectFactory _objectFactory;
0099:
0100: /**
0101: * Holds the current index in the character buffer.
0102: */
0103: private int _readIndex;
0104:
0105: /**
0106: * Number of characters read from reader
0107: */
0108: private int _readCount;
0109:
0110: /**
0111: * Holds the data buffer for CharSequence produced by this parser.
0112: */
0113: private char[] _data = new char[READER_BUFFER_CAPACITY * 2];
0114:
0115: /**
0116: * Holds the current index of the data buffer (_data).
0117: */
0118: private int _index;
0119:
0120: /**
0121: * Holds the current element nesting.
0122: */
0123: private int _depth;
0124:
0125: /**
0126: * Holds qualified name (include prefix).
0127: */
0128: private CharArray _qName;
0129:
0130: /**
0131: * Holds element prefix separator index.
0132: */
0133: private int _prefixSep;
0134:
0135: /**
0136: * Holds attribute qualified name.
0137: */
0138: private CharArray _attrQName;
0139:
0140: /**
0141: * Holds attribute prefix separator index.
0142: */
0143: private int _attrPrefixSep;
0144:
0145: /**
0146: * Holds attribute value.
0147: */
0148: private CharArray _attrValue;
0149:
0150: /**
0151: * Holds current event type
0152: */
0153: private int _eventType = START_DOCUMENT;
0154:
0155: /**
0156: * Indicates if event type is START_TAG, and tag is empty, i.e. <sometag/>
0157: */
0158: private boolean _isEmpty;
0159:
0160: /**
0161: * Indicates if characters are pending for potential coalescing.
0162: */
0163: boolean _charactersPending = false;
0164:
0165: /**
0166: * Holds the start index for the current state within _data array.
0167: */
0168: private int _start;
0169:
0170: /**
0171: * Holds the parser state.
0172: */
0173: private int _state = STATE_CHARACTERS;
0174:
0175: /**
0176: * Holds the current text.
0177: */
0178: private CharArray _text;
0179:
0180: /**
0181: * Holds the reader input source (<code>null</code> when unused).
0182: */
0183: private Reader _reader;
0184:
0185: /**
0186: * Holds the character buffer used for reading.
0187: */
0188: private final char[] _readBuffer = new char[READER_BUFFER_CAPACITY];
0189:
0190: /**
0191: * Holds the start offset in the character buffer (due to auto detection
0192: * of encoding).
0193: */
0194: private int _startOffset; // Byte Order Mark count.
0195:
0196: /**
0197: * Holds the location object.
0198: */
0199: private final LocationImpl _location = new LocationImpl();
0200:
0201: /**
0202: * Holds the namespace stack.
0203: */
0204: private final NamespacesImpl _namespaces = new NamespacesImpl();
0205:
0206: /**
0207: * Holds the current attributes.
0208: */
0209: private final AttributesImpl _attributes = new AttributesImpl(
0210: _namespaces);
0211:
0212: /**
0213: * Holds working stack (by nesting level).
0214: */
0215: private CharArray[] _elemStack = new CharArray[16];
0216:
0217: /**
0218: * Holds stream encoding if known.
0219: */
0220: private String _encoding;
0221:
0222: /**
0223: * Holds the entities.
0224: */
0225: private final EntitiesImpl _entities = new EntitiesImpl();
0226:
0227: /**
0228: * Holds the reader for input streams.
0229: */
0230: private final UTF8StreamReader _utf8StreamReader = new UTF8StreamReader();
0231:
0232: /**
0233: * Default constructor.
0234: */
0235: public XMLStreamReaderImpl() {
0236: }
0237:
0238: /**
0239: * Sets the input stream source for this XML stream reader
0240: * (encoding retrieved from XML prolog if any).
0241: *
0242: * @param in the input source with unknown encoding.
0243: */
0244: public void setInput(InputStream in) throws XMLStreamException {
0245: setInput(in, detectEncoding(in));
0246: CharArray prologEncoding = getCharacterEncodingScheme();
0247:
0248: // Checks if necessary to change the reader.
0249: if ((prologEncoding != null)
0250: && !prologEncoding.equals(_encoding)
0251: && !(isUTF8(prologEncoding) && isUTF8(_encoding))) {
0252: // Changes reader (keep characters already read).
0253: int startOffset = _readCount;
0254: reset();
0255: _startOffset = startOffset;
0256: setInput(in, prologEncoding.toString());
0257: }
0258: }
0259:
0260: private static boolean isUTF8(Object encoding) {
0261: return encoding.equals("utf-8") || encoding.equals("UTF-8")
0262: || encoding.equals("ASCII") || encoding.equals("utf8")
0263: || encoding.equals("UTF8");
0264: }
0265:
0266: /**
0267: * Sets the input stream source and encoding for this XML stream reader.
0268: *
0269: * @param in the input source.
0270: * @param encoding the associated encoding.
0271: */
0272: public void setInput(InputStream in, String encoding)
0273: throws XMLStreamException {
0274: _encoding = encoding;
0275: if (isUTF8(encoding)) { // Use our fast UTF-8 Reader.
0276: setInput(_utf8StreamReader.setInput(in));
0277: } else {
0278: try {
0279: setInput(new InputStreamReader(in, encoding));
0280: } catch (UnsupportedEncodingException e) {
0281: throw new XMLStreamException(e);
0282: }
0283: }
0284: }
0285:
0286: /**
0287: * Sets the reader input source for this XML stream reader.
0288: * This method reads the prolog (if any).
0289: *
0290: * @param reader the input source reader.
0291: * @see javolution.io.UTF8StreamReader
0292: * @see javolution.io.UTF8ByteBufferReader
0293: * @see javolution.io.CharSequenceReader
0294: */
0295: public void setInput(Reader reader) throws XMLStreamException {
0296: if (_reader != null)
0297: throw new IllegalStateException(
0298: "Reader not closed or reset");
0299: _reader = reader;
0300: try { // Reads prolog (if there)
0301: int readCount = reader.read(_readBuffer, _startOffset,
0302: _readBuffer.length - _startOffset);
0303: _readCount = (readCount >= 0) ? readCount + _startOffset
0304: : _startOffset;
0305: if ((_readCount >= 5) && (_readBuffer[0] == '<')
0306: && (_readBuffer[1] == '?')
0307: && (_readBuffer[2] == 'x')
0308: && (_readBuffer[3] == 'm')
0309: && (_readBuffer[4] == 'l')
0310: && (_readBuffer[5] == ' ')) { // Prolog detected.
0311: next(); // Processing instruction.
0312: _prolog = this .getPIData();
0313: _index = _prolog.offset() + _prolog.length(); // Keep prolog.
0314: _start = _index; // Default state.
0315: _eventType = START_DOCUMENT; // Resets to START_DOCUMENT.
0316: }
0317: } catch (IOException e) {
0318: throw new XMLStreamException(e);
0319: }
0320: }
0321:
0322: /**
0323: * Returns the current depth of the element. Outside the root element,
0324: * the depth is 0. The depth is incremented by 1 when a start tag is
0325: * reached. The depth is decremented AFTER the end tag event was observed.
0326: * [code]
0327: * <!-- outside --> 0
0328: * <root> 1
0329: * sometext 1
0330: * <foobar> 2
0331: * </foobar> 2
0332: * </root> 1
0333: * <!-- outside --> 0 [/code]
0334: *
0335: * @return the nesting depth.
0336: */
0337: public int getDepth() {
0338: return _depth;
0339: }
0340:
0341: /**
0342: * Returns the qualified name of the current event.
0343: *
0344: * @return the qualified name.
0345: * @throws IllegalStateException if this not a START_ELEMENT or END_ELEMENT.
0346: */
0347: public CharArray getQName() {
0348: if ((_eventType != XMLStreamConstants.START_ELEMENT)
0349: && (_eventType != XMLStreamConstants.END_ELEMENT))
0350: throw new IllegalStateException(
0351: "Not a start element or an end element");
0352: return _qName;
0353: }
0354:
0355: /**
0356: * Returns the current attributes (SAX2-Like).
0357: *
0358: * @return returns the number of attributes.
0359: * @throws IllegalStateException if not a START_ELEMENT.
0360: */
0361: public Attributes getAttributes() {
0362: if (_eventType != XMLStreamConstants.START_ELEMENT)
0363: throw new IllegalStateException("Not a start element");
0364: return _attributes;
0365: }
0366:
0367: /**
0368: * Defines a custom entities to replacement text mapping for this reader.
0369: * For example:[code]
0370: * FastMap<String, String> HTML_ENTITIES = new FastMap<String, String>();
0371: * HTML_ENTITIES.put("nbsp", " ");
0372: * HTML_ENTITIES.put("copy", "©");
0373: * HTML_ENTITIES.put("eacute", "é");
0374: * ...
0375: * XMLStreamReaderImpl reader = new XMLStreamReaderImpl();
0376: * reader.setEntities(HTML_ENTITIES);
0377: * [/code]
0378: * The entities mapping may be changed dynamically (e.g.
0379: * after reading the DTD and all external entities references are resolved).
0380: *
0381: * @param entities the entities to replacement texts mapping
0382: * (both must be <code>CharSequence</code> instances).
0383: */
0384: public void setEntities(Map entities) {
0385: _entities.setEntitiesMapping(entities);
0386: }
0387:
0388: /**
0389: * Returns the textual representation of this reader current state.
0390: *
0391: * @return the textual representation of the current state.
0392: */
0393: public String toString() {
0394: return "XMLStreamReader - State: "
0395: + NAMES_OF_EVENTS[_eventType] + ", Location: "
0396: + _location.toString();
0397: }
0398:
0399: // Implements XMLStreamReader Interface.
0400: public int next() throws XMLStreamException {
0401:
0402: // Clears previous state.
0403: if (_eventType == START_ELEMENT) {
0404: _attributes.reset();
0405: if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
0406: _isEmpty = false;
0407: return _eventType = END_ELEMENT;
0408: }
0409: } else if (_eventType == END_ELEMENT) {
0410: _namespaces.pop();
0411: CharArray startElem = _elemStack[_depth--];
0412: _start = _index = startElem.offset();
0413: while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances.
0414: }
0415: }
0416: // Reader loop.
0417: while (true) {
0418:
0419: // Main character reading block.
0420: if ((_readIndex >= _readCount) && isEndOfStream())
0421: return _eventType; // END_DOCUMENT or CHARACTERS.
0422: char c = _readBuffer[_readIndex++];
0423: if (c <= '&')
0424: c = (c == '&') ? replaceEntity()
0425: : (c < ' ') ? handleEndOfLine(c) : c;
0426: _data[_index++] = c;
0427:
0428: // Main processing.
0429: //
0430: switch (_state) {
0431:
0432: case STATE_CHARACTERS:
0433: while (true) { // Read characters data all at once.
0434:
0435: if (c == '<') {
0436: int length = _index - _start - 1;
0437: if (length > 0) {
0438: if (_charactersPending) {
0439: _text.setArray(_data, _text.offset(),
0440: _text.length() + length); // Coalescing.
0441: } else {
0442: _text = newSeq(_start, length);
0443: _charactersPending = true;
0444: }
0445: _start = _index - 1; // Keeps '<' as part of markup.
0446: }
0447: _state = STATE_MARKUP;
0448: break;
0449: }
0450:
0451: // Local character reading block.
0452: if ((_readIndex >= _readCount) && isEndOfStream())
0453: return _eventType;
0454: c = _readBuffer[_readIndex++];
0455: if (c <= '&')
0456: c = (c == '&') ? replaceEntity()
0457: : (c < ' ') ? handleEndOfLine(c) : c;
0458: _data[_index++] = c;
0459: }
0460: break;
0461:
0462: case STATE_CDATA:
0463: while (true) { // Reads CDATA all at once.
0464:
0465: if ((c == '>') && (_index - _start >= 3)
0466: && (_data[_index - 2] == ']')
0467: && (_data[_index - 3] == ']')) {
0468: _index -= 3;
0469: int length = _index - _start;
0470: if (length > 0) { // Not empty.
0471: if (_charactersPending) {
0472: _text.setArray(_data, _text.offset(),
0473: _text.length() + length); // Coalescing.
0474: } else {
0475: _text = newSeq(_start, length);
0476: _charactersPending = true;
0477: }
0478: }
0479: _start = _index;
0480: _state = STATE_CHARACTERS;
0481: break;
0482: }
0483:
0484: // Local character reading block.
0485: if (_readIndex >= _readCount)
0486: reloadBuffer();
0487: c = _readBuffer[_readIndex++];
0488: if (c < ' ')
0489: c = handleEndOfLine(c);
0490: _data[_index++] = c;
0491: }
0492: break;
0493:
0494: case STATE_DTD:
0495: if (c == '>') {
0496: _text = newSeq(_start, _index - _start);
0497: _index = _start; // Do not keep DTD.
0498: _state = STATE_CHARACTERS;
0499: return _eventType = DTD;
0500: } else if (c == '[') {
0501: _state = STATE_DTD_INTERNAL;
0502: }
0503: break;
0504:
0505: case STATE_DTD_INTERNAL:
0506: if (c == ']') {
0507: _state = STATE_DTD;
0508: }
0509: break;
0510:
0511: case STATE_MARKUP: // Starts with '<'
0512: if (_index - _start == 2) {
0513: if (c == '/') {
0514: _start = _index = _index - 2;
0515: _state = STATE_CLOSE_TAGxREAD_ELEM_NAME;
0516: _prefixSep = -1;
0517: if (_charactersPending) { // Flush characters event.
0518: _charactersPending = false;
0519: return _eventType = CHARACTERS;
0520: }
0521: } else if (c == '?') {
0522: _start = _index = _index - 2;
0523: _state = STATE_PI;
0524: if (_charactersPending) { // Flush characters event.
0525: _charactersPending = false;
0526: return _eventType = CHARACTERS;
0527: }
0528: } else if (c != '!') { // Element tag (first letter).
0529: _data[_start] = c;
0530: _index = _start + 1;
0531: _state = STATE_OPEN_TAGxREAD_ELEM_NAME;
0532: _prefixSep = -1;
0533: if (_charactersPending) { // Flush character event.
0534: _charactersPending = false;
0535: return _eventType = CHARACTERS;
0536: }
0537: }
0538: } else if ((_index - _start == 4)
0539: && (_data[_start + 1] == '!')
0540: && (_data[_start + 2] == '-')
0541: && (_data[_start + 3] == '-')) {
0542: _start = _index = _index - 4; // Removes <!--
0543: _state = STATE_COMMENT;
0544: if (_charactersPending) { // Flush character event.
0545: _charactersPending = false;
0546: return _eventType = CHARACTERS;
0547: }
0548:
0549: } else if ((_index - _start == 9)
0550: && (_data[_start + 1] == '!')
0551: && (_data[_start + 2] == '[')
0552: && (_data[_start + 3] == 'C')
0553: && (_data[_start + 4] == 'D')
0554: && (_data[_start + 5] == 'A')
0555: && (_data[_start + 6] == 'T')
0556: && (_data[_start + 7] == 'A')
0557: && (_data[_start + 8] == '[')) {
0558: _start = _index = _index - 9; // Do not keep <![CDATA[
0559: _state = STATE_CDATA;
0560:
0561: } else if ((_index - _start == 9)
0562: && (_data[_start + 1] == '!')
0563: && (_data[_start + 2] == 'D')
0564: && (_data[_start + 3] == 'O')
0565: && (_data[_start + 4] == 'C')
0566: && (_data[_start + 5] == 'T')
0567: && (_data[_start + 6] == 'Y')
0568: && (_data[_start + 7] == 'P')
0569: && (_data[_start + 8] == 'E')) {
0570: // Keeps <!DOCTYPE as part of DTD.
0571: _state = STATE_DTD;
0572: } else {
0573: // Ignores, e.g. <!ELEMENT <!ENTITY...
0574: }
0575: break;
0576:
0577: case STATE_COMMENT:
0578: while (true) { // Read comment all at once.
0579:
0580: if ((c == '>') && (_index - _start >= 3)
0581: && (_data[_index - 2] == '-')
0582: && (_data[_index - 3] == '-')) {
0583: _index -= 3; // Removes -->
0584: _text = newSeq(_start, _index - _start);
0585: _state = STATE_CHARACTERS;
0586: _index = _start; // Do not keep comments.
0587: return _eventType = COMMENT;
0588: }
0589:
0590: // Local character reading block.
0591: if (_readIndex >= _readCount)
0592: reloadBuffer();
0593: c = _readBuffer[_readIndex++];
0594: if (c < ' ')
0595: c = handleEndOfLine(c);
0596: _data[_index++] = c;
0597: }
0598:
0599: case STATE_PI:
0600: if ((c == '>') && (_index - _start >= 2)
0601: && (_data[_index - 2] == '?')) {
0602: _index -= 2; // Removes ?>
0603: _text = newSeq(_start, _index - _start);
0604: _state = STATE_CHARACTERS;
0605: _index = _start; // Do not keep processing instructions.
0606: return _eventType = PROCESSING_INSTRUCTION;
0607: }
0608: break;
0609:
0610: // OPEN_TAG:
0611: case STATE_OPEN_TAGxREAD_ELEM_NAME:
0612: while (true) { // Read element name all at once.
0613:
0614: if (c < '@') { // Else avoid multiple checks.
0615: if (c == '>') {
0616: _qName = newSeq(_start, --_index - _start);
0617: _start = _index;
0618: _state = STATE_CHARACTERS;
0619: processStartTag();
0620: _isEmpty = false;
0621: return _eventType = START_ELEMENT;
0622: } else if (c == '/') {
0623: _qName = newSeq(_start, --_index - _start);
0624: _start = _index;
0625: _state = STATE_OPEN_TAGxEMPTY_TAG;
0626: break;
0627: } else if (c == ':') {
0628: _prefixSep = _index - 1;
0629: } else if (c <= ' ') {
0630: _qName = newSeq(_start, --_index - _start);
0631: _state = STATE_OPEN_TAGxELEM_NAME_READ;
0632: break;
0633: }
0634: }
0635:
0636: if (_readIndex >= _readCount)
0637: reloadBuffer();
0638: c = _data[_index++] = _readBuffer[_readIndex++];
0639: }
0640: break;
0641:
0642: case STATE_OPEN_TAGxELEM_NAME_READ:
0643: if (c == '>') {
0644: _start = --_index;
0645: _state = STATE_CHARACTERS;
0646: processStartTag();
0647: _isEmpty = false;
0648: return _eventType = START_ELEMENT;
0649: } else if (c == '/') {
0650: _state = STATE_OPEN_TAGxEMPTY_TAG;
0651: } else if (c > ' ') {
0652: _start = _index - 1; // Includes current character.
0653: _attrPrefixSep = -1;
0654: _state = STATE_OPEN_TAGxREAD_ATTR_NAME;
0655: }
0656: break;
0657:
0658: case STATE_OPEN_TAGxREAD_ATTR_NAME:
0659: while (true) { // Read attribute name all at once.
0660:
0661: if (c < '@') { // Else avoid multiple checks.
0662: if (c <= ' ') {
0663: _attrQName = newSeq(_start, --_index
0664: - _start);
0665: _state = STATE_OPEN_TAGxATTR_NAME_READ;
0666: break;
0667: } else if (c == '=') {
0668: _attrQName = newSeq(_start, --_index
0669: - _start);
0670: _state = STATE_OPEN_TAGxEQUAL_READ;
0671: break;
0672: } else if (c == ':') {
0673: _attrPrefixSep = _index - 1;
0674: }
0675: }
0676:
0677: if (_readIndex >= _readCount)
0678: reloadBuffer();
0679: _data[_index++] = c = _readBuffer[_readIndex++];
0680: }
0681: break;
0682:
0683: case STATE_OPEN_TAGxATTR_NAME_READ:
0684: if (c == '=') {
0685: --_index;
0686: _state = STATE_OPEN_TAGxEQUAL_READ;
0687: } else if (c > ' ') {
0688: throw new XMLStreamException("'=' expected",
0689: _location);
0690: }
0691: break;
0692:
0693: case STATE_OPEN_TAGxEQUAL_READ:
0694: if (c == '\'') {
0695: _start = --_index;
0696: _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE;
0697: } else if (c == '\"') {
0698: _start = --_index;
0699: _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE;
0700: } else if (c > ' ') {
0701: throw new XMLStreamException("Quotes expected",
0702: _location);
0703: }
0704: break;
0705:
0706: case STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE:
0707: while (true) { // Read attribute value all at once.
0708:
0709: if (c == '\'') {
0710: _attrValue = newSeq(_start, --_index - _start);
0711: processAttribute();
0712: _state = STATE_OPEN_TAGxELEM_NAME_READ;
0713: break;
0714: }
0715:
0716: // Local character reading block.
0717: if (_readIndex >= _readCount)
0718: reloadBuffer();
0719: c = _readBuffer[_readIndex++];
0720: if (c == '&')
0721: c = replaceEntity();
0722: _data[_index++] = c;
0723: }
0724: break;
0725:
0726: case STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE:
0727: while (true) { // Read attribute value all at once.
0728:
0729: if (c == '\"') {
0730: _attrValue = newSeq(_start, --_index - _start);
0731: processAttribute();
0732: _state = STATE_OPEN_TAGxELEM_NAME_READ;
0733: break;
0734: }
0735:
0736: // Local character reading block.
0737: if (_readIndex >= _readCount)
0738: reloadBuffer();
0739: c = _readBuffer[_readIndex++];
0740: if (c == '&')
0741: c = replaceEntity();
0742: _data[_index++] = c;
0743: }
0744: break;
0745:
0746: case STATE_OPEN_TAGxEMPTY_TAG:
0747: if (c == '>') {
0748: _start = --_index;
0749: _state = STATE_CHARACTERS;
0750: processStartTag();
0751: _isEmpty = true;
0752: return _eventType = START_ELEMENT;
0753: } else {
0754: throw new XMLStreamException("'>' expected",
0755: _location);
0756: }
0757:
0758: // CLOSE_TAG:
0759: case STATE_CLOSE_TAGxREAD_ELEM_NAME:
0760: while (true) { // Element name can be read all at once.
0761:
0762: if (c < '@') { // Else avoid multiple checks.
0763: if (c == '>') {
0764: _qName = newSeq(_start, --_index - _start);
0765: _start = _index;
0766: _state = STATE_CHARACTERS;
0767: processEndTag();
0768: return _eventType = END_ELEMENT;
0769: } else if (c == ':') {
0770: _prefixSep = _index - 1;
0771: } else if (c <= ' ') {
0772: _qName = newSeq(_start, --_index - _start);
0773: _state = STATE_CLOSE_TAGxELEM_NAME_READ;
0774: break;
0775: }
0776: }
0777:
0778: if (_readIndex >= _readCount)
0779: reloadBuffer();
0780: c = _data[_index++] = _readBuffer[_readIndex++];
0781: }
0782: break;
0783:
0784: case STATE_CLOSE_TAGxELEM_NAME_READ:
0785: if (c == '>') {
0786: _start = --_index;
0787: _state = STATE_CHARACTERS;
0788: processEndTag();
0789: return _eventType = END_ELEMENT;
0790: } else if (c > ' ') {
0791: throw new XMLStreamException("'>' expected",
0792: _location);
0793: }
0794: break;
0795:
0796: default:
0797: throw new XMLStreamException(
0798: "State unknown: " + _state, _location);
0799: }
0800: }
0801: }
0802:
0803: // Defines parsing states (keep values close together to avoid lookup).
0804: private static final int STATE_CHARACTERS = 1;
0805:
0806: private static final int STATE_MARKUP = 2;
0807:
0808: private static final int STATE_COMMENT = 3;
0809:
0810: private static final int STATE_PI = 4;
0811:
0812: private static final int STATE_CDATA = 5;
0813:
0814: private static final int STATE_OPEN_TAGxREAD_ELEM_NAME = 6;
0815:
0816: private static final int STATE_OPEN_TAGxELEM_NAME_READ = 7;
0817:
0818: private static final int STATE_OPEN_TAGxREAD_ATTR_NAME = 8;
0819:
0820: private static final int STATE_OPEN_TAGxATTR_NAME_READ = 9;
0821:
0822: private static final int STATE_OPEN_TAGxEQUAL_READ = 10;
0823:
0824: private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE = 11;
0825:
0826: private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE = 12;
0827:
0828: private static final int STATE_OPEN_TAGxEMPTY_TAG = 13;
0829:
0830: private static final int STATE_CLOSE_TAGxREAD_ELEM_NAME = 14;
0831:
0832: private static final int STATE_CLOSE_TAGxELEM_NAME_READ = 15;
0833:
0834: private static final int STATE_DTD = 16;
0835:
0836: private static final int STATE_DTD_INTERNAL = 17;
0837:
0838: /**
0839: * Reloads data buffer.
0840: *
0841: * @param detectEndOfStream indicates
0842: * @return <code>true</code> if the buffer has been reloaded;
0843: * <code>false</code> if the end of stream has being reached
0844: * and the event type (CHARACTERS or END_DOCUMENT) has been set.
0845: */
0846: private void reloadBuffer() throws XMLStreamException {
0847: _location._column += _readIndex;
0848: _location._charactersRead += _readIndex;
0849: _readIndex = 0;
0850: try {
0851: _readCount = _reader.read(_readBuffer, 0,
0852: _readBuffer.length);
0853: if ((_readCount <= 0)
0854: && ((_depth != 0) || (_state != STATE_CHARACTERS)))
0855: throw new XMLStreamException(
0856: "Unexpected end of document", _location);
0857: } catch (IOException e) {
0858: throw new XMLStreamException(e);
0859: }
0860: while ((_index + _readCount) >= _data.length) { // Potential overflow.
0861: increaseDataBuffer();
0862: }
0863: }
0864:
0865: /**
0866: * Detects end of stream.
0867: *
0868: * @return <code>true</code> if end of stream has being reached
0869: * and the event type (CHARACTERS or END_DOCUMENT) has been set;
0870: * <code>false</code> otherwise.
0871: */
0872: private boolean isEndOfStream() throws XMLStreamException {
0873: if (_readIndex >= _readCount)
0874: reloadBuffer();
0875: if (_readCount <= 0) {
0876: // _state == STATE_CHARACTERS (otherwise reloadBuffer() exception)
0877: if (_eventType == END_DOCUMENT)
0878: throw new XMLStreamException(
0879: "End document has already been reached");
0880: int length = _index - _start;
0881: if (length > 0) { // Flushes trailing characters.
0882: if (_charactersPending) {
0883: _text.setArray(_data, _text.offset(), _text
0884: .length()
0885: + length); // Coalescing.
0886: } else {
0887: _text = newSeq(_start, length);
0888: }
0889: _start = _index;
0890: _eventType = CHARACTERS;
0891: } else {
0892: _eventType = END_DOCUMENT;
0893: }
0894: return true;
0895: }
0896: return false;
0897: }
0898:
0899: /**
0900: * Handles end of line as per XML Spec. 2.11
0901: *
0902: * @param c the potential end of line character.
0903: * @return the replacement character for end of line.
0904: */
0905: private char handleEndOfLine(char c) throws XMLStreamException {
0906: if (c == 0xD) { // Replaces #xD with #xA
0907: // Unless next char is #xA, then skip,
0908: // #xD#xA will be replaced by #xA
0909: if (_readIndex >= _readCount)
0910: reloadBuffer();
0911: if ((_readIndex < _readCount)
0912: && (_readBuffer[_readIndex] == 0xA))
0913: _readIndex++; // Skips 0xD
0914: c = (char) 0xA;
0915: }
0916: if (c == 0xA) {
0917: _location._line++;
0918: _location._column = -_readIndex; // column = 0
0919: } else if (c == 0x0) {
0920: throw new XMLStreamException(
0921: "Illegal XML character U+0000", _location);
0922: }
0923: return c;
0924: }
0925:
0926: /**
0927: * Replaces an entity if the current state allows it.
0928: *
0929: * @return the next character after the text replacement or '&' if no
0930: * replacement took place.
0931: */
0932: private char replaceEntity() throws XMLStreamException {
0933: if ((_state == STATE_COMMENT) || (_state == STATE_PI)
0934: || (_state == STATE_CDATA))
0935: return '&'; // (&2.4)
0936:
0937: int start = _index; // Index of first replacement character.
0938: _data[_index++] = '&';
0939: while (true) {
0940: if (_readIndex >= _readCount)
0941: reloadBuffer();
0942: char c = _data[_index++] = _readBuffer[_readIndex++];
0943: if (c == ';')
0944: break;
0945: if (c <= ' ')
0946: throw new XMLStreamException("';' expected", _location);
0947: }
0948: // Ensures that the replacement string holds in the data buffer.
0949: while (start + _entities.getMaxLength() >= _data.length)
0950: increaseDataBuffer();
0951:
0952: // Replaces the entity.
0953: int length = _entities.replaceEntity(_data, start, _index
0954: - start);
0955:
0956: // Returns the next character after entity unless ampersand.
0957: _index = start + length;
0958:
0959: // Local character reading block.
0960: if (_readIndex >= _readCount)
0961: reloadBuffer();
0962: char c = _readBuffer[_readIndex++];
0963: return (c == '&') ? (c = replaceEntity()) : c;
0964: }
0965:
0966: /**
0967: * Processes the attribute just read.
0968: */
0969: private void processAttribute() throws XMLStreamException {
0970: if (_attrPrefixSep < 0) { // No prefix.
0971: if (isXMLNS(_attrQName)) { // Sets default namespace.
0972: _namespaces.setPrefix(_namespaces._defaultNsPrefix,
0973: _attrValue);
0974: } else {
0975: _attributes.addAttribute(_attrQName, null, _attrQName,
0976: _attrValue);
0977: }
0978: } else { // Prefix.
0979: final int offset = _attrQName.offset();
0980: final int length = _attrQName.length();
0981:
0982: CharArray prefix = newSeq(offset, _attrPrefixSep - offset);
0983:
0984: CharArray localName = newSeq(_attrPrefixSep + 1, offset
0985: + length - _attrPrefixSep - 1);
0986:
0987: if (isXMLNS(prefix)) { // Namespace association.
0988: _namespaces.setPrefix(localName, _attrValue);
0989: } else {
0990: _attributes.addAttribute(localName, prefix, _attrQName,
0991: _attrValue);
0992: }
0993: }
0994: }
0995:
0996: private static boolean isXMLNS(CharArray chars) {
0997: return (chars.length() == 5) && (chars.charAt(0) == 'x')
0998: && (chars.charAt(1) == 'm') && (chars.charAt(2) == 'l')
0999: && (chars.charAt(3) == 'n') && (chars.charAt(4) == 's');
1000: }
1001:
1002: private void processEndTag() throws XMLStreamException {
1003: if (!_qName.equals(_elemStack[_depth]))
1004: throw new XMLStreamException("Unexpected end tag for "
1005: + _qName, _location);
1006: }
1007:
1008: private void processStartTag() throws XMLStreamException {
1009: if (++_depth >= _elemStack.length) {
1010: increaseStack();
1011: }
1012: _elemStack[_depth] = _qName;
1013: _namespaces.push();
1014: }
1015:
1016: // Implements Reusable.
1017: public void reset() {
1018: // Resets all members (alphabetically ordered).
1019: _attributes.reset();
1020: _attrPrefixSep = 0;
1021: _attrQName = null;
1022: _attrValue = null;
1023: _attrQName = null;
1024: _charactersPending = false;
1025: _encoding = null;
1026: _entities.reset();
1027: _eventType = START_DOCUMENT;
1028: _index = 0;
1029: _isEmpty = false;
1030: _location.reset();
1031: _namespaces.reset();
1032: _objectFactory = null;
1033: _prolog = null;
1034: _readCount = 0;
1035: _reader = null;
1036: _depth = 0;
1037: _readIndex = 0;
1038: _seqsIndex = 0;
1039: _start = 0;
1040: _startOffset = 0;
1041: _state = STATE_CHARACTERS;
1042: _utf8StreamReader.reset();
1043: }
1044:
1045: // Returns a new character sequence from the pool.
1046: private CharArray newSeq(int offset, int length) {
1047: CharArray seq = (_seqsIndex < _seqsCapacity) ? _seqs[_seqsIndex++]
1048: : newSeq2();
1049: return seq.setArray(_data, offset, length);
1050: }
1051:
1052: private CharArray newSeq2() {
1053: MemoryArea.getMemoryArea(this ).executeInArea(_createSeqLogic);
1054: return _seqs[_seqsIndex++];
1055: }
1056:
1057: private final Runnable _createSeqLogic = new Runnable() {
1058: public void run() {
1059: if (_seqsCapacity >= _seqs.length) { // Resizes.
1060: CharArray[] tmp = new CharArray[_seqs.length * 2];
1061: System.arraycopy(_seqs, 0, tmp, 0, _seqs.length);
1062: _seqs = tmp;
1063: }
1064: CharArray seq = new CharArray();
1065: _seqs[_seqsCapacity++] = seq;
1066: }
1067: };
1068:
1069: private CharArray[] _seqs = new CharArray[256];
1070:
1071: private int _seqsIndex;
1072:
1073: private int _seqsCapacity;
1074:
1075: // Increases internal data buffer capacity.
1076: private void increaseDataBuffer() {
1077: // Note: The character data at any nesting level is discarded
1078: // only when moving to outer nesting level (due to coalescing).
1079: // This accumulation may cause resize of the data buffer if
1080: // numerous elements at the same nesting level are separated by
1081: // spaces or indentation.
1082: MemoryArea.getMemoryArea(this ).executeInArea(new Runnable() {
1083: public void run() {
1084: char[] tmp = new char[_data.length * 2];
1085: javolution.context.LogContext.info(new CharArray(
1086: "XMLStreamReaderImpl: Data buffer increased to "
1087: + tmp.length));
1088: System.arraycopy(_data, 0, tmp, 0, _data.length);
1089: _data = tmp;
1090: }
1091: });
1092: }
1093:
1094: // Increases statck.
1095: private void increaseStack() {
1096: MemoryArea.getMemoryArea(this ).executeInArea(new Runnable() {
1097: public void run() {
1098: CharArray[] tmp = new CharArray[_elemStack.length * 2];
1099: javolution.context.LogContext.info(new CharArray(
1100: "XMLStreamReaderImpl: CharArray stack increased to "
1101: + tmp.length));
1102: System.arraycopy(_elemStack, 0, tmp, 0,
1103: _elemStack.length);
1104: _elemStack = tmp;
1105: }
1106: });
1107: }
1108:
1109: /**
1110: * This inner class represents the parser location.
1111: */
1112: private final class LocationImpl implements Location, Reusable {
1113:
1114: int _column;
1115:
1116: int _line;
1117:
1118: int _charactersRead;
1119:
1120: public int getLineNumber() {
1121: return _line + 1;
1122: }
1123:
1124: public int getColumnNumber() {
1125: return _column + _readIndex;
1126: }
1127:
1128: public int getCharacterOffset() {
1129: return _charactersRead + _readIndex;
1130: }
1131:
1132: public String getPublicId() {
1133: return null; // Not available.
1134: }
1135:
1136: public String getSystemId() {
1137: return null; // Not available.
1138: }
1139:
1140: public String toString() {
1141: return "Line " + getLineNumber() + ", Column "
1142: + getColumnNumber();
1143: }
1144:
1145: public void reset() {
1146: _line = 0;
1147: _column = 0;
1148: _charactersRead = 0;
1149: }
1150: }
1151:
1152: //////////////////////////////////////////
1153: // Implements XMLStreamReader Interface //
1154: //////////////////////////////////////////
1155:
1156: // Implements XMLStreamReader Interface.
1157: public void require(int type, CharSequence namespaceURI,
1158: CharSequence localName) throws XMLStreamException {
1159: if (_eventType != type)
1160: throw new XMLStreamException("Expected event: "
1161: + NAMES_OF_EVENTS[type] + ", found event: "
1162: + NAMES_OF_EVENTS[_eventType]);
1163: if ((namespaceURI != null)
1164: && !getNamespaceURI().equals(namespaceURI))
1165: throw new XMLStreamException("Expected namespace URI: "
1166: + namespaceURI + ", found: " + getNamespaceURI());
1167: if ((localName != null) && !getLocalName().equals(localName))
1168: throw new XMLStreamException("Expected local name: "
1169: + localName + ", found: " + getLocalName());
1170: }
1171:
1172: // Implements XMLStreamReader Interface.
1173: public CharArray getElementText() throws XMLStreamException {
1174: // Derived from interface specification code.
1175: if (getEventType() != XMLStreamConstants.START_ELEMENT) {
1176: throw new XMLStreamException(
1177: "Parser must be on START_ELEMENT to read next text",
1178: getLocation());
1179: }
1180: CharArray text = null;
1181: int eventType = next();
1182: while (eventType != XMLStreamConstants.END_ELEMENT) {
1183: if (eventType == XMLStreamConstants.CHARACTERS) {
1184: if (text == null) {
1185: text = getText();
1186: } else { // Merge (adjacent text, comments and PI are not kept).
1187: text.setArray(_data, text.offset(), text.length()
1188: + getText().length());
1189: }
1190: } else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
1191: || eventType == XMLStreamConstants.COMMENT) {
1192: // Skips (not kept).
1193: } else if (eventType == XMLStreamConstants.END_DOCUMENT) {
1194: throw new XMLStreamException(
1195: "Unexpected end of document when reading element text content",
1196: getLocation());
1197: } else if (eventType == XMLStreamConstants.START_ELEMENT) {
1198: throw new XMLStreamException(
1199: "Element text content may not contain START_ELEMENT",
1200: getLocation());
1201: } else {
1202: throw new XMLStreamException("Unexpected event type "
1203: + NAMES_OF_EVENTS[eventType], getLocation());
1204: }
1205: eventType = next();
1206: }
1207: return (text != null) ? text : newSeq(0, 0);
1208: }
1209:
1210: // Implements XMLStreamReader Interface.
1211: public Object getProperty(String name)
1212: throws IllegalArgumentException {
1213: if (name.equals(XMLInputFactory.IS_COALESCING)) {
1214: return new Boolean(true);
1215: } else if (name.equals(XMLInputFactory.ENTITIES)) {
1216: return _entities.getEntitiesMapping();
1217: } else {
1218: throw new IllegalArgumentException("Property: " + name
1219: + " not supported");
1220: }
1221: }
1222:
1223: // Implements XMLStreamReader Interface.
1224: public void close() throws XMLStreamException {
1225: if (_objectFactory != null) {
1226: _objectFactory.recycle(this ); // Automatic reset.
1227: } else {
1228: reset();
1229: }
1230: }
1231:
1232: public int getAttributeCount() {
1233: if (_eventType != XMLStreamConstants.START_ELEMENT)
1234: throw illegalState("Not a start element");
1235: return _attributes.getLength();
1236: }
1237:
1238: public CharArray getAttributeLocalName(int index) {
1239: if (_eventType != XMLStreamConstants.START_ELEMENT)
1240: throw illegalState("Not a start element");
1241: return _attributes.getLocalName(index);
1242: }
1243:
1244: public CharArray getAttributeNamespace(int index) {
1245: if (_eventType != XMLStreamConstants.START_ELEMENT)
1246: throw illegalState("Not a start element");
1247: CharArray prefix = _attributes.getPrefix(index);
1248: return _namespaces.getNamespaceURINullAllowed(prefix);
1249: }
1250:
1251: public CharArray getAttributePrefix(int index) {
1252: if (_eventType != XMLStreamConstants.START_ELEMENT)
1253: throw illegalState("Not a start element");
1254: return _attributes.getPrefix(index);
1255: }
1256:
1257: public CharArray getAttributeType(int index) {
1258: if (_eventType != XMLStreamConstants.START_ELEMENT)
1259: throw illegalState("Not a start element");
1260: return _attributes.getType(index);
1261: }
1262:
1263: public CharArray getAttributeValue(CharSequence uri,
1264: CharSequence localName) {
1265: if (_eventType != XMLStreamConstants.START_ELEMENT)
1266: throw illegalState("Not a start element");
1267: return (uri == null) ? _attributes.getValue(localName)
1268: : _attributes.getValue(uri, localName);
1269: }
1270:
1271: public CharArray getAttributeValue(int index) {
1272: if (_eventType != XMLStreamConstants.START_ELEMENT)
1273: throw illegalState("Not a start element");
1274: return _attributes.getValue(index);
1275: }
1276:
1277: public CharArray getCharacterEncodingScheme() {
1278: return readPrologAttribute(ENCODING);
1279: }
1280:
1281: private static final CharArray ENCODING = new CharArray("encoding");
1282:
1283: public String getEncoding() {
1284: return _encoding;
1285: }
1286:
1287: public int getEventType() {
1288: return _eventType;
1289: }
1290:
1291: public CharArray getLocalName() {
1292: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1293: && (_eventType != XMLStreamConstants.END_ELEMENT))
1294: throw illegalState("Not a start or end element");
1295: if (_prefixSep < 0)
1296: return _qName;
1297: CharArray localName = newSeq(_prefixSep + 1, _qName.offset()
1298: + _qName.length() - _prefixSep - 1);
1299: return localName;
1300: }
1301:
1302: public Location getLocation() {
1303: return _location;
1304: }
1305:
1306: public int getNamespaceCount() {
1307: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1308: && (_eventType != XMLStreamConstants.END_ELEMENT))
1309: throw illegalState("Not a start or end element");
1310: return _namespaces._namespacesCount[_depth];
1311: }
1312:
1313: public CharArray getNamespacePrefix(int index) {
1314: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1315: && (_eventType != XMLStreamConstants.END_ELEMENT))
1316: throw illegalState("Not a start or end element");
1317: return _namespaces._prefixes[index];
1318: }
1319:
1320: public CharArray getNamespaceURI(CharSequence prefix) {
1321: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1322: && (_eventType != XMLStreamConstants.END_ELEMENT))
1323: throw illegalState("Not a start or end element");
1324: return _namespaces.getNamespaceURI(prefix);
1325: }
1326:
1327: public CharArray getNamespaceURI(int index) {
1328: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1329: && (_eventType != XMLStreamConstants.END_ELEMENT))
1330: throw illegalState("Not a start or end element");
1331: return _namespaces._namespaces[index];
1332: }
1333:
1334: public NamespaceContext getNamespaceContext() {
1335: return _namespaces;
1336: }
1337:
1338: public CharArray getNamespaceURI() {
1339: return _namespaces.getNamespaceURINullAllowed(getPrefix());
1340: }
1341:
1342: public CharArray getPrefix() {
1343: if ((_eventType != XMLStreamConstants.START_ELEMENT)
1344: && (_eventType != XMLStreamConstants.END_ELEMENT))
1345: throw illegalState("Not a start or end element");
1346: if (_prefixSep < 0)
1347: return null;
1348: int offset = _qName.offset();
1349: CharArray prefix = newSeq(offset, _prefixSep - offset);
1350: return prefix;
1351: }
1352:
1353: public CharArray getPIData() {
1354: if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
1355: throw illegalState("Not a processing instruction");
1356: int offset = _text.offsetOf(' ') + 1;
1357: CharArray piData = newSeq(offset, _text.length() - offset);
1358: return piData;
1359: }
1360:
1361: public CharArray getPITarget() {
1362: if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
1363: throw illegalState("Not a processing instruction");
1364: CharArray piTarget = newSeq(_text.offset(), _text.offsetOf(' '));
1365: return piTarget;
1366: }
1367:
1368: public CharArray getText() {
1369: if ((_eventType != XMLStreamConstants.CHARACTERS)
1370: && (_eventType != XMLStreamConstants.COMMENT)
1371: && (_eventType != XMLStreamConstants.DTD))
1372: throw illegalState("Not a text event");
1373: return _text;
1374: }
1375:
1376: public char[] getTextCharacters() {
1377: return getText().array();
1378: }
1379:
1380: public int getTextCharacters(int sourceStart, char[] target,
1381: int targetStart, int length) throws XMLStreamException {
1382: CharArray text = getText();
1383: int copyLength = Math.min(length, text.length());
1384: System.arraycopy(text.array(), sourceStart + text.offset(),
1385: target, targetStart, copyLength);
1386: return copyLength;
1387: }
1388:
1389: public int getTextLength() {
1390: return getText().length();
1391: }
1392:
1393: public int getTextStart() {
1394: return getText().offset();
1395: }
1396:
1397: public CharArray getVersion() {
1398: return readPrologAttribute(VERSION);
1399: }
1400:
1401: private static final CharArray VERSION = new CharArray("version");
1402:
1403: public boolean isStandalone() {
1404: CharArray standalone = readPrologAttribute(STANDALONE);
1405: return (standalone != null) ? standalone.equals("no") : true;
1406: }
1407:
1408: public boolean standaloneSet() {
1409: return readPrologAttribute(STANDALONE) != null;
1410: }
1411:
1412: private static final CharArray STANDALONE = new CharArray(
1413: "standalone");
1414:
1415: public boolean hasName() {
1416: return (_eventType == XMLStreamConstants.START_ELEMENT)
1417: || (_eventType == XMLStreamConstants.END_ELEMENT);
1418: }
1419:
1420: public boolean hasNext() throws XMLStreamException {
1421: return _eventType != XMLStreamConstants.END_DOCUMENT;
1422: }
1423:
1424: public boolean hasText() {
1425: return ((_eventType == XMLStreamConstants.CHARACTERS)
1426: || (_eventType == XMLStreamConstants.COMMENT) || (_eventType == XMLStreamConstants.DTD))
1427: && (_text.length() > 0);
1428: }
1429:
1430: public boolean isAttributeSpecified(int index) {
1431: if (_eventType != XMLStreamConstants.START_ELEMENT)
1432: throw new IllegalStateException("Not a start element");
1433: return _attributes.getValue(index) != null;
1434: }
1435:
1436: public boolean isCharacters() {
1437: return _eventType == XMLStreamConstants.CHARACTERS;
1438: }
1439:
1440: public boolean isEndElement() {
1441: return _eventType == XMLStreamConstants.END_ELEMENT;
1442: }
1443:
1444: public boolean isStartElement() {
1445: return _eventType == XMLStreamConstants.START_ELEMENT;
1446: }
1447:
1448: public boolean isWhiteSpace() {
1449: if (isCharacters()) {
1450: char[] chars = _text.array();
1451: for (int i = _text.offset(), end = _text.offset()
1452: + _text.length(); i < end;) {
1453: if (!isWhiteSpace(chars[i++]))
1454: return false;
1455: }
1456: return true;
1457: }
1458: return false;
1459: }
1460:
1461: // Whitespaces according to XML 1.1 Specification.
1462: private static boolean isWhiteSpace(char c) {
1463: return (c == 0x20) || (c == 0x9) || (c == 0xD) || (c == 0xA);
1464: }
1465:
1466: public int nextTag() throws XMLStreamException {
1467: int eventType = next();
1468: while (eventType == XMLStreamConstants.COMMENT
1469: || eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
1470: || eventType == XMLStreamConstants.DTD
1471: || (eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace())) {
1472: eventType = next();
1473: }
1474: if (eventType != XMLStreamConstants.START_ELEMENT
1475: && eventType != XMLStreamConstants.END_ELEMENT)
1476: throw new XMLStreamException("Tag expected (but found "
1477: + NAMES_OF_EVENTS[_eventType] + ")");
1478: return eventType;
1479: }
1480:
1481: private IllegalStateException illegalState(String msg) {
1482: return new IllegalStateException(msg + " ("
1483: + NAMES_OF_EVENTS[_eventType] + ")");
1484: }
1485:
1486: private String detectEncoding(InputStream input)
1487: throws XMLStreamException {
1488: // Autodetect encoding (see http://en.wikipedia.org/wiki/UTF-16)
1489: int byte0;
1490: try {
1491: byte0 = input.read();
1492: } catch (IOException e) {
1493: throw new XMLStreamException(e);
1494: }
1495: if (byte0 == -1)
1496: throw new XMLStreamException("Premature End-Of-File");
1497: if (byte0 == '<') { // UTF-8 or compatible encoding.
1498: _readBuffer[_startOffset++] = '<';
1499: return "UTF-8";
1500: } else {
1501: int byte1;
1502: try {
1503: byte1 = input.read();
1504: } catch (IOException e) {
1505: throw new XMLStreamException(e);
1506: }
1507: if (byte1 == -1)
1508: throw new XMLStreamException("Premature End-Of-File");
1509: if ((byte0 == 0) && (byte1 == '<')) { // UTF-16 BIG ENDIAN
1510: _readBuffer[_startOffset++] = '<';
1511: return "UTF-16BE";
1512: } else if ((byte0 == '<') && (byte1 == 0)) { // UTF-16 LITTLE ENDIAN
1513: _readBuffer[_startOffset++] = '<';
1514: return "UTF-16LE";
1515: } else if ((byte0 == 0xFF) && (byte1 == 0xFE)) { // BOM for UTF-16 LITTLE ENDIAN
1516: return "UTF-16";
1517: } else if ((byte0 == 0xFE) && (byte1 == 0xFF)) { // BOM for UTF-16 BIG ENDIAN
1518: return "UTF-16";
1519: } else { // Encoding unknown (or no prolog) assumes UTF-8
1520: _readBuffer[_startOffset++] = (char) byte0;
1521: _readBuffer[_startOffset++] = (char) byte1;
1522: return "UTF-8";
1523: }
1524: }
1525: }
1526:
1527: private final CharArray readPrologAttribute(CharSequence name) {
1528: if (_prolog == null)
1529: return null;
1530: final int READ_EQUAL = 0;
1531: final int READ_QUOTE = 1;
1532: final int VALUE_SIMPLE_QUOTE = 2;
1533: final int VALUE_DOUBLE_QUOTE = 3;
1534:
1535: int i = _prolog.offsetOf(name);
1536: if (i >= 0) {
1537: int maxIndex = _prolog.offset() + _prolog.length();
1538: i += name.length();
1539: int state = READ_EQUAL;
1540: int valueOffset = 0;
1541: while (i < maxIndex) {
1542: char c = _prolog.array()[i++];
1543: switch (state) {
1544: case READ_EQUAL:
1545: if (c == '=') {
1546: state = READ_QUOTE;
1547: }
1548: break;
1549: case READ_QUOTE:
1550: if (c == '"') {
1551: state = VALUE_DOUBLE_QUOTE;
1552: valueOffset = i;
1553: } else if (c == '\'') {
1554: state = VALUE_SIMPLE_QUOTE;
1555: valueOffset = i;
1556: }
1557: break;
1558: case VALUE_SIMPLE_QUOTE:
1559: if (c == '\'')
1560: return newSeq(valueOffset, i - valueOffset - 1);
1561: break;
1562: case VALUE_DOUBLE_QUOTE:
1563: if (c == '"')
1564: return newSeq(valueOffset, i - valueOffset - 1);
1565: break;
1566: }
1567: }
1568: }
1569: return null;
1570: }
1571: }
|