0001: /*
0002: * Fast Infoset ver. 0.1 software ("Software")
0003: *
0004: * Copyright, 2004-2005 Sun Microsystems, Inc. All Rights Reserved.
0005: *
0006: * Software is licensed under the Apache License, Version 2.0 (the "License");
0007: * you may not use this file except in compliance with the License. You may
0008: * obtain a copy of the License at:
0009: *
0010: * http://www.apache.org/licenses/LICENSE-2.0
0011: *
0012: * Unless required by applicable law or agreed to in writing, software
0013: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
0014: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
0015: * License for the specific language governing permissions and limitations.
0016: *
0017: * Sun supports and benefits from the global community of open source
0018: * developers, and thanks the community for its important contributions and
0019: * open standards-based technology, which Sun has adopted into many of its
0020: * products.
0021: *
0022: * Please note that portions of Software may be provided with notices and
0023: * open source licenses from such communities and third parties that govern the
0024: * use of those portions, and any licenses granted hereunder do not alter any
0025: * rights and obligations you may have under such open source licenses,
0026: * however, the disclaimer of warranty and limitation of liability provisions
0027: * in this License will apply to all Software in this distribution.
0028: *
0029: * You acknowledge that the Software is not designed, licensed or intended
0030: * for use in the design, construction, operation or maintenance of any nuclear
0031: * facility.
0032: *
0033: * Apache License
0034: * Version 2.0, January 2004
0035: * http://www.apache.org/licenses/
0036: *
0037: */
0038: package com.sun.xml.fastinfoset;
0039:
0040: import com.sun.xml.fastinfoset.alphabet.BuiltInRestrictedAlphabets;
0041: import com.sun.xml.fastinfoset.org.apache.xerces.util.XMLChar;
0042: import com.sun.xml.fastinfoset.util.CharArray;
0043: import com.sun.xml.fastinfoset.util.CharArrayArray;
0044: import com.sun.xml.fastinfoset.util.CharArrayString;
0045: import com.sun.xml.fastinfoset.util.ContiguousCharArrayArray;
0046: import com.sun.xml.fastinfoset.util.DuplicateAttributeVerifier;
0047: import com.sun.xml.fastinfoset.util.PrefixArray;
0048: import com.sun.xml.fastinfoset.util.QualifiedNameArray;
0049: import com.sun.xml.fastinfoset.util.StringArray;
0050: import com.sun.xml.fastinfoset.vocab.ParserVocabulary;
0051: import java.io.EOFException;
0052: import java.io.IOException;
0053: import java.io.InputStream;
0054: import java.util.ArrayList;
0055: import java.util.HashMap;
0056: import java.util.List;
0057: import java.util.Map;
0058: import org.jvnet.fastinfoset.FastInfosetException;
0059: import org.jvnet.fastinfoset.FastInfosetParser;
0060:
0061: /**
0062: * Abstract decoder for developing concrete encoders.
0063: *
0064: * Concrete implementations extending Decoder will utilize methods on Decoder
0065: * to decode XML infoset according to the Fast Infoset standard. It is the
0066: * responsibility of the concrete implementation to ensure that methods are
0067: * invoked in the correct order to correctly decode a valid fast infoset
0068: * document.
0069: *
0070: * <p>
0071: * This class extends org.sax.xml.DefaultHandler so that concrete SAX
0072: * implementations can be used with javax.xml.parsers.SAXParser and the parse
0073: * methods that take org.sax.xml.DefaultHandler as a parameter.
0074: *
0075: * <p>
0076: * Buffering of octets that are read from an {@link java.io.InputStream} is
0077: * supported in a similar manner to a {@link java.io.BufferedInputStream}.
0078: * Combining buffering with decoding enables better performance.
0079: *
0080: * <p>
0081: * More than one fast infoset document may be decoded from the
0082: * {@link java.io.InputStream}.
0083: */
0084: public abstract class Decoder implements FastInfosetParser {
0085:
0086: protected static final char[] XML_NAMESPACE_NAME_CHARS = EncodingConstants.XML_NAMESPACE_NAME
0087: .toCharArray();
0088:
0089: protected static final char[] XMLNS_NAMESPACE_PREFIX_CHARS = EncodingConstants.XMLNS_NAMESPACE_PREFIX
0090: .toCharArray();
0091:
0092: protected static final char[] XMLNS_NAMESPACE_NAME_CHARS = EncodingConstants.XMLNS_NAMESPACE_NAME
0093: .toCharArray();
0094:
0095: /**
0096: * String interning system property.
0097: */
0098: public static final String STRING_INTERNING_SYSTEM_PROPERTY = "com.sun.xml.fastinfoset.parser.string-interning";
0099:
0100: /**
0101: * Internal buffer size interning system property.
0102: */
0103: public static final String BUFFER_SIZE_SYSTEM_PROPERTY = "com.sun.xml.fastinfoset.parser.buffer-size";
0104:
0105: private static boolean _stringInterningSystemDefault = false;
0106:
0107: private static int _bufferSizeSystemDefault = 1024;
0108:
0109: static {
0110: String p = System.getProperty(STRING_INTERNING_SYSTEM_PROPERTY,
0111: Boolean.toString(_stringInterningSystemDefault));
0112: _stringInterningSystemDefault = Boolean.valueOf(p)
0113: .booleanValue();
0114:
0115: p = System.getProperty(BUFFER_SIZE_SYSTEM_PROPERTY, Integer
0116: .toString(_bufferSizeSystemDefault));
0117: try {
0118: int i = Integer.valueOf(p).intValue();
0119: if (i > 0) {
0120: _bufferSizeSystemDefault = i;
0121: }
0122: } catch (NumberFormatException e) {
0123: }
0124: }
0125:
0126: /**
0127: * True if string interning is performed by the decoder.
0128: */
0129: private boolean _stringInterning = _stringInterningSystemDefault;
0130:
0131: /**
0132: * The input stream from which the fast infoset document is being read.
0133: */
0134: private InputStream _s;
0135:
0136: /**
0137: * The map of URIs to referenced vocabularies.
0138: */
0139: private Map _externalVocabularies;
0140:
0141: /**
0142: * True if can parse fragments.
0143: */
0144: protected boolean _parseFragments;
0145:
0146: /**
0147: * True if needs to close underlying input stream.
0148: */
0149: protected boolean _needForceStreamClose;
0150:
0151: /**
0152: * True if the vocabulary is internally created by decoder.
0153: */
0154: private boolean _vIsInternal;
0155:
0156: /**
0157: * The list of Notation Information Items that are part of the
0158: * Document Information Item.
0159: */
0160: protected List _notations;
0161:
0162: /**
0163: * The list of Unparsed Entity Information Items that are part of the
0164: * Document Information Item.
0165: */
0166: protected List _unparsedEntities;
0167:
0168: /**
0169: * The map of URIs to registered encoding algorithms.
0170: */
0171: protected Map _registeredEncodingAlgorithms = new HashMap();
0172:
0173: /**
0174: * The vocabulary used for decoding.
0175: */
0176: protected ParserVocabulary _v;
0177:
0178: /**
0179: * The prefix table of the vocabulary.
0180: */
0181: protected PrefixArray _prefixTable;
0182:
0183: /**
0184: * The element name table of the vocabulary.
0185: */
0186: protected QualifiedNameArray _elementNameTable;
0187:
0188: /**
0189: * The attribute name table of the vocabulary.
0190: */
0191: protected QualifiedNameArray _attributeNameTable;
0192:
0193: /**
0194: * The character content chunk table of the vocabulary.
0195: */
0196: protected ContiguousCharArrayArray _characterContentChunkTable;
0197:
0198: /**
0199: * The attribute value table of the vocabulary.
0200: */
0201: protected StringArray _attributeValueTable;
0202:
0203: /**
0204: * The current octet that is being read
0205: */
0206: protected int _b;
0207:
0208: /**
0209: * True if an information item is terminated.
0210: */
0211: protected boolean _terminate;
0212:
0213: /**
0214: * True if two information item are terminated in direct sequence.
0215: */
0216: protected boolean _doubleTerminate;
0217:
0218: /**
0219: * True if an entry is required to be added to a table
0220: */
0221: protected boolean _addToTable;
0222:
0223: /**
0224: * The vocabulary table index to an indexed non identifying string.
0225: */
0226: protected int _integer;
0227:
0228: /**
0229: * The vocabulary table index of identifying string or the identifier of
0230: * an encoding algorithm or restricted alphabet.
0231: */
0232: protected int _identifier;
0233:
0234: /**
0235: * The size of the internal buffer.
0236: */
0237: protected int _bufferSize = _bufferSizeSystemDefault;
0238:
0239: /**
0240: * The internal buffer used for decoding.
0241: */
0242: protected byte[] _octetBuffer = new byte[_bufferSizeSystemDefault];
0243:
0244: /**
0245: * A mark into the internal buffer used for decoding encoded algorithm
0246: * or restricted alphabet data.
0247: */
0248: protected int _octetBufferStart;
0249:
0250: /**
0251: * The offset into the buffer to read the next byte.
0252: */
0253: protected int _octetBufferOffset;
0254:
0255: /**
0256: * The end of the buffer.
0257: */
0258: protected int _octetBufferEnd;
0259:
0260: /**
0261: * The length of some octets in the buffer that are to be read.
0262: */
0263: protected int _octetBufferLength;
0264:
0265: /**
0266: * The internal buffer of characters.
0267: */
0268: protected char[] _charBuffer = new char[512];
0269:
0270: /**
0271: * The length of characters in the buffer of characters.
0272: */
0273: protected int _charBufferLength;
0274:
0275: /**
0276: * Helper class that checks for duplicate attribute information items.
0277: */
0278: protected DuplicateAttributeVerifier _duplicateAttributeVerifier = new DuplicateAttributeVerifier();
0279:
0280: /**
0281: * Default constructor for the Decoder.
0282: */
0283: protected Decoder() {
0284: _v = new ParserVocabulary();
0285: _prefixTable = _v.prefix;
0286: _elementNameTable = _v.elementName;
0287: _attributeNameTable = _v.attributeName;
0288: _characterContentChunkTable = _v.characterContentChunk;
0289: _attributeValueTable = _v.attributeValue;
0290: _vIsInternal = true;
0291: }
0292:
0293: // FastInfosetParser interface
0294:
0295: /**
0296: * {@inheritDoc}
0297: */
0298: public void setStringInterning(boolean stringInterning) {
0299: _stringInterning = stringInterning;
0300: }
0301:
0302: /**
0303: * {@inheritDoc}
0304: */
0305: public boolean getStringInterning() {
0306: return _stringInterning;
0307: }
0308:
0309: /**
0310: * {@inheritDoc}
0311: */
0312: public void setBufferSize(int bufferSize) {
0313: if (_bufferSize > _octetBuffer.length) {
0314: _bufferSize = bufferSize;
0315: }
0316: }
0317:
0318: /**
0319: * {@inheritDoc}
0320: */
0321: public int getBufferSize() {
0322: return _bufferSize;
0323: }
0324:
0325: /**
0326: * {@inheritDoc}
0327: */
0328: public void setRegisteredEncodingAlgorithms(Map algorithms) {
0329: _registeredEncodingAlgorithms = algorithms;
0330: if (_registeredEncodingAlgorithms == null) {
0331: _registeredEncodingAlgorithms = new HashMap();
0332: }
0333: }
0334:
0335: /**
0336: * {@inheritDoc}
0337: */
0338: public Map getRegisteredEncodingAlgorithms() {
0339: return _registeredEncodingAlgorithms;
0340: }
0341:
0342: /**
0343: * {@inheritDoc}
0344: */
0345: public void setExternalVocabularies(Map referencedVocabualries) {
0346: if (referencedVocabualries != null) {
0347: // Clone the input map
0348: _externalVocabularies = new HashMap();
0349: _externalVocabularies.putAll(referencedVocabualries);
0350: } else {
0351: _externalVocabularies = null;
0352: }
0353: }
0354:
0355: /**
0356: * {@inheritDoc}
0357: */
0358: public Map getExternalVocabularies() {
0359: return _externalVocabularies;
0360: }
0361:
0362: /**
0363: * {@inheritDoc}
0364: */
0365: public void setParseFragments(boolean parseFragments) {
0366: _parseFragments = parseFragments;
0367: }
0368:
0369: /**
0370: * {@inheritDoc}
0371: */
0372: public boolean getParseFragments() {
0373: return _parseFragments;
0374: }
0375:
0376: /**
0377: * {@inheritDoc}
0378: */
0379: public void setForceStreamClose(boolean needForceStreamClose) {
0380: _needForceStreamClose = needForceStreamClose;
0381: }
0382:
0383: /**
0384: * {@inheritDoc}
0385: */
0386: public boolean getForceStreamClose() {
0387: return _needForceStreamClose;
0388: }
0389:
0390: // End FastInfosetParser interface
0391:
0392: /**
0393: * Reset the decoder for reuse decoding another XML infoset.
0394: */
0395: public void reset() {
0396: _terminate = _doubleTerminate = false;
0397: }
0398:
0399: /**
0400: * Set the ParserVocabulary to be used for decoding.
0401: *
0402: * @param v the vocabulary to be used for decoding.
0403: */
0404: public void setVocabulary(ParserVocabulary v) {
0405: _v = v;
0406: _prefixTable = _v.prefix;
0407: _elementNameTable = _v.elementName;
0408: _attributeNameTable = _v.attributeName;
0409: _characterContentChunkTable = _v.characterContentChunk;
0410: _attributeValueTable = _v.attributeValue;
0411: _vIsInternal = false;
0412: }
0413:
0414: /**
0415: * Set the InputStream to decode the fast infoset document.
0416: *
0417: * @param s the InputStream where the fast infoset document is decoded from.
0418: */
0419: public void setInputStream(InputStream s) {
0420: _s = s;
0421: _octetBufferOffset = 0;
0422: _octetBufferEnd = 0;
0423: if (_vIsInternal == true) {
0424: _v.clear();
0425: }
0426: }
0427:
0428: protected final void decodeDII() throws FastInfosetException,
0429: IOException {
0430: final int b = read();
0431: if (b == EncodingConstants.DOCUMENT_INITIAL_VOCABULARY_FLAG) {
0432: decodeInitialVocabulary();
0433: } else if (b != 0) {
0434: throw new IOException(CommonResourceBundle.getInstance()
0435: .getString("message.optinalValues"));
0436: }
0437: }
0438:
0439: protected final void decodeAdditionalData()
0440: throws FastInfosetException, IOException {
0441: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0442: String URI = decodeNonEmptyOctetStringOnSecondBitAsUtf8String();
0443:
0444: decodeNonEmptyOctetStringLengthOnSecondBit();
0445: ensureOctetBufferSize();
0446: _octetBufferStart = _octetBufferOffset;
0447: _octetBufferOffset += _octetBufferLength;
0448: }
0449: }
0450:
0451: protected final void decodeInitialVocabulary()
0452: throws FastInfosetException, IOException {
0453: // First 5 optionals of 13 bit optional field
0454: int b = read();
0455: // Next 8 optionals of 13 bit optional field
0456: int b2 = read();
0457:
0458: // Optimize for the most common case
0459: if (b == EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG
0460: && b2 == 0) {
0461: decodeExternalVocabularyURI();
0462: return;
0463: }
0464:
0465: if ((b & EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG) > 0) {
0466: decodeExternalVocabularyURI();
0467: }
0468:
0469: if ((b & EncodingConstants.INITIAL_VOCABULARY_RESTRICTED_ALPHABETS_FLAG) > 0) {
0470: decodeTableItems(_v.restrictedAlphabet);
0471: }
0472:
0473: if ((b & EncodingConstants.INITIAL_VOCABULARY_ENCODING_ALGORITHMS_FLAG) > 0) {
0474: decodeTableItems(_v.encodingAlgorithm);
0475: }
0476:
0477: if ((b & EncodingConstants.INITIAL_VOCABULARY_PREFIXES_FLAG) > 0) {
0478: decodeTableItems(_v.prefix);
0479: }
0480:
0481: if ((b & EncodingConstants.INITIAL_VOCABULARY_NAMESPACE_NAMES_FLAG) > 0) {
0482: decodeTableItems(_v.namespaceName);
0483: }
0484:
0485: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_LOCAL_NAMES_FLAG) > 0) {
0486: decodeTableItems(_v.localName);
0487: }
0488:
0489: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_OTHER_NCNAMES_FLAG) > 0) {
0490: decodeTableItems(_v.otherNCName);
0491: }
0492:
0493: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_OTHER_URIS_FLAG) > 0) {
0494: decodeTableItems(_v.otherURI);
0495: }
0496:
0497: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_ATTRIBUTE_VALUES_FLAG) > 0) {
0498: decodeTableItems(_v.attributeValue);
0499: }
0500:
0501: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_CONTENT_CHARACTER_CHUNKS_FLAG) > 0) {
0502: decodeTableItems(_v.characterContentChunk);
0503: }
0504:
0505: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_OTHER_STRINGS_FLAG) > 0) {
0506: decodeTableItems(_v.otherString);
0507: }
0508:
0509: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_ELEMENT_NAME_SURROGATES_FLAG) > 0) {
0510: decodeTableItems(_v.elementName, false);
0511: }
0512:
0513: if ((b2 & EncodingConstants.INITIAL_VOCABULARY_ATTRIBUTE_NAME_SURROGATES_FLAG) > 0) {
0514: decodeTableItems(_v.attributeName, true);
0515: }
0516: }
0517:
0518: private void decodeExternalVocabularyURI()
0519: throws FastInfosetException, IOException {
0520: if (_externalVocabularies == null) {
0521: throw new IOException(CommonResourceBundle.getInstance()
0522: .getString("message.noExternalVocabularies"));
0523: }
0524:
0525: String externalVocabularyURI = decodeNonEmptyOctetStringOnSecondBitAsUtf8String();
0526:
0527: Object o = _externalVocabularies.get(externalVocabularyURI);
0528: if (o instanceof ParserVocabulary) {
0529: _v.setReferencedVocabulary(externalVocabularyURI,
0530: (ParserVocabulary) o, false);
0531: } else if (o instanceof org.jvnet.fastinfoset.ExternalVocabulary) {
0532: org.jvnet.fastinfoset.ExternalVocabulary v = (org.jvnet.fastinfoset.ExternalVocabulary) o;
0533: ParserVocabulary pv = new ParserVocabulary(v.vocabulary);
0534:
0535: _externalVocabularies.put(externalVocabularyURI, pv);
0536: _v
0537: .setReferencedVocabulary(externalVocabularyURI, pv,
0538: false);
0539: } else {
0540: throw new FastInfosetException(CommonResourceBundle
0541: .getInstance().getString(
0542: "message.externalVocabularyNotRegistered",
0543: new Object[] { externalVocabularyURI }));
0544: }
0545: }
0546:
0547: private void decodeTableItems(StringArray array)
0548: throws FastInfosetException, IOException {
0549: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0550: array
0551: .add(decodeNonEmptyOctetStringOnSecondBitAsUtf8String());
0552: }
0553: }
0554:
0555: private void decodeTableItems(PrefixArray array)
0556: throws FastInfosetException, IOException {
0557: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0558: array
0559: .add(decodeNonEmptyOctetStringOnSecondBitAsUtf8String());
0560: }
0561: }
0562:
0563: private void decodeTableItems(ContiguousCharArrayArray array)
0564: throws FastInfosetException, IOException {
0565: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0566: switch (decodeNonIdentifyingStringOnFirstBit()) {
0567: case NISTRING_STRING:
0568: array.add(_charBuffer, _charBufferLength);
0569: break;
0570: default:
0571: throw new FastInfosetException(CommonResourceBundle
0572: .getInstance()
0573: .getString("message.illegalState"));
0574: }
0575: }
0576: }
0577:
0578: private void decodeTableItems(CharArrayArray array)
0579: throws FastInfosetException, IOException {
0580: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0581: switch (decodeNonIdentifyingStringOnFirstBit()) {
0582: case NISTRING_STRING:
0583: array.add(new CharArray(_charBuffer, 0,
0584: _charBufferLength, true));
0585: break;
0586: default:
0587: throw new FastInfosetException(CommonResourceBundle
0588: .getInstance()
0589: .getString("message.illegalState"));
0590: }
0591: }
0592: }
0593:
0594: private void decodeTableItems(QualifiedNameArray array,
0595: boolean isAttribute) throws FastInfosetException,
0596: IOException {
0597: for (int i = 0; i < decodeNumberOfItemsOfSequence(); i++) {
0598: final int b = read();
0599:
0600: String prefix = "";
0601: int prefixIndex = -1;
0602: if ((b & EncodingConstants.NAME_SURROGATE_PREFIX_FLAG) > 0) {
0603: prefixIndex = decodeIntegerIndexOnSecondBit();
0604: prefix = _v.prefix.get(prefixIndex);
0605: }
0606:
0607: String namespaceName = "";
0608: int namespaceNameIndex = -1;
0609: if ((b & EncodingConstants.NAME_SURROGATE_NAME_FLAG) > 0) {
0610: namespaceNameIndex = decodeIntegerIndexOnSecondBit();
0611: namespaceName = _v.prefix.get(prefixIndex);
0612: }
0613:
0614: if (namespaceName == "" && prefix != "") {
0615: throw new FastInfosetException(CommonResourceBundle
0616: .getInstance().getString(
0617: "message.missingNamespace"));
0618: }
0619:
0620: final int localNameIndex = decodeIntegerIndexOnSecondBit();
0621: final String localName = _v.localName.get(localNameIndex);
0622:
0623: QualifiedName qualifiedName = new QualifiedName(prefix,
0624: namespaceName, localName, prefixIndex,
0625: namespaceNameIndex, localNameIndex, _charBuffer);
0626: if (isAttribute) {
0627: qualifiedName
0628: .createAttributeValues(_duplicateAttributeVerifier.MAP_SIZE);
0629: }
0630: array.add(qualifiedName);
0631: }
0632: }
0633:
0634: private int decodeNumberOfItemsOfSequence() throws IOException {
0635: final int b = read();
0636: if (b < 128) {
0637: return b;
0638: } else {
0639: return ((b & 0x0F) << 16) | (read() << 8) | read();
0640: }
0641: }
0642:
0643: protected final void decodeNotations() throws FastInfosetException,
0644: IOException {
0645: if (_notations == null) {
0646: _notations = new ArrayList();
0647: } else {
0648: _notations.clear();
0649: }
0650:
0651: int b = read();
0652: while ((b & EncodingConstants.NOTATIONS_MASK) == EncodingConstants.NOTATIONS) {
0653: String name = decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherNCName);
0654:
0655: String system_identifier = ((_b & EncodingConstants.NOTATIONS_SYSTEM_IDENTIFIER_FLAG) > 0) ? decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI)
0656: : "";
0657: String public_identifier = ((_b & EncodingConstants.NOTATIONS_PUBLIC_IDENTIFIER_FLAG) > 0) ? decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI)
0658: : "";
0659:
0660: Notation notation = new Notation(name, system_identifier,
0661: public_identifier);
0662: _notations.add(notation);
0663:
0664: b = read();
0665: }
0666: if (b != EncodingConstants.TERMINATOR) {
0667: throw new FastInfosetException(CommonResourceBundle
0668: .getInstance().getString(
0669: "message.IIsNotTerminatedCorrectly"));
0670: }
0671: }
0672:
0673: protected final void decodeUnparsedEntities()
0674: throws FastInfosetException, IOException {
0675: if (_unparsedEntities == null) {
0676: _unparsedEntities = new ArrayList();
0677: } else {
0678: _unparsedEntities.clear();
0679: }
0680:
0681: int b = read();
0682: while ((b & EncodingConstants.UNPARSED_ENTITIES_MASK) == EncodingConstants.UNPARSED_ENTITIES) {
0683: String name = decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherNCName);
0684: String system_identifier = decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI);
0685:
0686: String public_identifier = ((_b & EncodingConstants.UNPARSED_ENTITIES_PUBLIC_IDENTIFIER_FLAG) > 0) ? decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherURI)
0687: : "";
0688:
0689: String notation_name = decodeIdentifyingNonEmptyStringOnFirstBit(_v.otherNCName);
0690:
0691: UnparsedEntity unparsedEntity = new UnparsedEntity(name,
0692: system_identifier, public_identifier, notation_name);
0693: _unparsedEntities.add(unparsedEntity);
0694:
0695: b = read();
0696: }
0697: if (b != EncodingConstants.TERMINATOR) {
0698: throw new FastInfosetException(CommonResourceBundle
0699: .getInstance()
0700: .getString("message.unparsedEntities"));
0701: }
0702: }
0703:
0704: protected final String decodeCharacterEncodingScheme()
0705: throws FastInfosetException, IOException {
0706: return decodeNonEmptyOctetStringOnSecondBitAsUtf8String();
0707: }
0708:
0709: protected final String decodeVersion() throws FastInfosetException,
0710: IOException {
0711: switch (decodeNonIdentifyingStringOnFirstBit()) {
0712: case NISTRING_STRING:
0713: final String data = new String(_charBuffer, 0,
0714: _charBufferLength);
0715: if (_addToTable) {
0716: _v.otherString.add(new CharArrayString(data));
0717: }
0718: return data;
0719: case NISTRING_ENCODING_ALGORITHM:
0720: throw new FastInfosetException(CommonResourceBundle
0721: .getInstance().getString(
0722: "message.decodingNotSupported"));
0723: case NISTRING_INDEX:
0724: return _v.otherString.get(_integer).toString();
0725: case NISTRING_EMPTY_STRING:
0726: default:
0727: return "";
0728: }
0729: }
0730:
0731: protected final QualifiedName decodeEIIIndexMedium()
0732: throws FastInfosetException, IOException {
0733: final int i = (((_b & EncodingConstants.INTEGER_3RD_BIT_MEDIUM_MASK) << 8) | read())
0734: + EncodingConstants.INTEGER_3RD_BIT_SMALL_LIMIT;
0735: return _v.elementName._array[i];
0736: }
0737:
0738: protected final QualifiedName decodeEIIIndexLarge()
0739: throws FastInfosetException, IOException {
0740: int i;
0741: if ((_b & EncodingConstants.INTEGER_3RD_BIT_LARGE_LARGE_FLAG) == 0x20) {
0742: // EII large index
0743: i = (((_b & EncodingConstants.INTEGER_3RD_BIT_LARGE_MASK) << 16)
0744: | (read() << 8) | read())
0745: + EncodingConstants.INTEGER_3RD_BIT_MEDIUM_LIMIT;
0746: } else {
0747: // EII large large index
0748: i = (((read() & EncodingConstants.INTEGER_3RD_BIT_LARGE_LARGE_MASK) << 16)
0749: | (read() << 8) | read())
0750: + EncodingConstants.INTEGER_3RD_BIT_LARGE_LIMIT;
0751: }
0752: return _v.elementName._array[i];
0753: }
0754:
0755: protected final QualifiedName decodeLiteralQualifiedName(int state,
0756: QualifiedName q) throws FastInfosetException, IOException {
0757: if (q == null)
0758: q = new QualifiedName();
0759: switch (state) {
0760: // no prefix, no namespace
0761: case 0:
0762: return q
0763: .set(
0764: "",
0765: "",
0766: decodeIdentifyingNonEmptyStringOnFirstBit(_v.localName),
0767: -1, -1, _identifier, null);
0768: // no prefix, namespace
0769: case 1:
0770: return q
0771: .set(
0772: "",
0773: decodeIdentifyingNonEmptyStringIndexOnFirstBitAsNamespaceName(false),
0774: decodeIdentifyingNonEmptyStringOnFirstBit(_v.localName),
0775: -1, _namespaceNameIndex, _identifier, null);
0776: // prefix, no namespace
0777: case 2:
0778: throw new FastInfosetException(CommonResourceBundle
0779: .getInstance().getString(
0780: "message.qNameMissingNamespaceName"));
0781: // prefix, namespace
0782: case 3:
0783: return q
0784: .set(
0785: decodeIdentifyingNonEmptyStringIndexOnFirstBitAsPrefix(true),
0786: decodeIdentifyingNonEmptyStringIndexOnFirstBitAsNamespaceName(true),
0787: decodeIdentifyingNonEmptyStringOnFirstBit(_v.localName),
0788: _prefixIndex, _namespaceNameIndex,
0789: _identifier, _charBuffer);
0790: default:
0791: throw new FastInfosetException(CommonResourceBundle
0792: .getInstance().getString("message.decodingEII"));
0793: }
0794: }
0795:
0796: protected static final int NISTRING_STRING = 0;
0797: protected static final int NISTRING_INDEX = 1;
0798: protected static final int NISTRING_ENCODING_ALGORITHM = 2;
0799: protected static final int NISTRING_EMPTY_STRING = 3;
0800:
0801: /*
0802: * C.14
0803: * decodeNonIdentifyingStringOnFirstBit
0804: */
0805: protected final int decodeNonIdentifyingStringOnFirstBit()
0806: throws FastInfosetException, IOException {
0807: final int b = read();
0808: switch (DecoderStateTables.NISTRING[b]) {
0809: case DecoderStateTables.NISTRING_UTF8_SMALL_LENGTH:
0810: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0811: _octetBufferLength = (b & EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_MASK) + 1;
0812: decodeUtf8StringAsCharBuffer();
0813: return NISTRING_STRING;
0814: case DecoderStateTables.NISTRING_UTF8_MEDIUM_LENGTH:
0815: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0816: _octetBufferLength = read()
0817: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT;
0818: decodeUtf8StringAsCharBuffer();
0819: return NISTRING_STRING;
0820: case DecoderStateTables.NISTRING_UTF8_LARGE_LENGTH: {
0821: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0822: final int length = (read() << 24) | (read() << 16)
0823: | (read() << 8) | read();
0824: _octetBufferLength = length
0825: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT;
0826: decodeUtf8StringAsCharBuffer();
0827: return NISTRING_STRING;
0828: }
0829: case DecoderStateTables.NISTRING_UTF16_SMALL_LENGTH:
0830: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0831: _octetBufferLength = (b & EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_MASK) + 1;
0832: decodeUtf16StringAsCharBuffer();
0833: return NISTRING_STRING;
0834: case DecoderStateTables.NISTRING_UTF16_MEDIUM_LENGTH:
0835: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0836: _octetBufferLength = read()
0837: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT;
0838: decodeUtf16StringAsCharBuffer();
0839: return NISTRING_STRING;
0840: case DecoderStateTables.NISTRING_UTF16_LARGE_LENGTH: {
0841: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0842: final int length = (read() << 24) | (read() << 16)
0843: | (read() << 8) | read();
0844: _octetBufferLength = length
0845: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT;
0846: decodeUtf16StringAsCharBuffer();
0847: return NISTRING_STRING;
0848: }
0849: case DecoderStateTables.NISTRING_RA: {
0850: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0851: // Decode resitricted alphabet integer
0852: _identifier = (b & 0x0F) << 4;
0853: final int b2 = read();
0854: _identifier |= (b2 & 0xF0) >> 4;
0855:
0856: decodeOctetsOnFifthBitOfNonIdentifyingStringOnFirstBit(b2);
0857:
0858: decodeRestrictedAlphabetAsCharBuffer();
0859: return NISTRING_STRING;
0860: }
0861: case DecoderStateTables.NISTRING_EA: {
0862: _addToTable = (b & EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG) > 0;
0863: // Decode encoding algorithm integer
0864: _identifier = (b & 0x0F) << 4;
0865: final int b2 = read();
0866: _identifier |= (b2 & 0xF0) >> 4;
0867:
0868: decodeOctetsOnFifthBitOfNonIdentifyingStringOnFirstBit(b2);
0869: return NISTRING_ENCODING_ALGORITHM;
0870: }
0871: case DecoderStateTables.NISTRING_INDEX_SMALL:
0872: _integer = b & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
0873: return NISTRING_INDEX;
0874: case DecoderStateTables.NISTRING_INDEX_MEDIUM:
0875: _integer = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
0876: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
0877: return NISTRING_INDEX;
0878: case DecoderStateTables.NISTRING_INDEX_LARGE:
0879: _integer = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
0880: | (read() << 8) | read())
0881: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
0882: return NISTRING_INDEX;
0883: case DecoderStateTables.NISTRING_EMPTY:
0884: return NISTRING_EMPTY_STRING;
0885: default:
0886: throw new FastInfosetException(CommonResourceBundle
0887: .getInstance().getString(
0888: "message.decodingNonIdentifyingString"));
0889: }
0890: }
0891:
0892: protected final void decodeOctetsOnFifthBitOfNonIdentifyingStringOnFirstBit(
0893: int b) throws FastInfosetException, IOException {
0894: // Remove top 4 bits of restricted alphabet or encoding algorithm integer
0895: b &= 0x0F;
0896: // Reuse UTF8 length states
0897: switch (DecoderStateTables.NISTRING[b]) {
0898: case DecoderStateTables.NISTRING_UTF8_SMALL_LENGTH:
0899: _octetBufferLength = b + 1;
0900: break;
0901: case DecoderStateTables.NISTRING_UTF8_MEDIUM_LENGTH:
0902: _octetBufferLength = read()
0903: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT;
0904: break;
0905: case DecoderStateTables.NISTRING_UTF8_LARGE_LENGTH:
0906: final int length = (read() << 24) | (read() << 16)
0907: | (read() << 8) | read();
0908: _octetBufferLength = length
0909: + EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT;
0910: break;
0911: default:
0912: throw new FastInfosetException(CommonResourceBundle
0913: .getInstance().getString("message.decodingOctets"));
0914: }
0915: ensureOctetBufferSize();
0916: _octetBufferStart = _octetBufferOffset;
0917: _octetBufferOffset += _octetBufferLength;
0918: }
0919:
0920: protected final void decodeOctetsOnSeventhBitOfNonIdentifyingStringOnThirdBit(
0921: int b) throws FastInfosetException, IOException {
0922: // Remove top 6 bits of restricted alphabet or encoding algorithm integer
0923: switch (b & 0x03) {
0924: // Small length
0925: case 0:
0926: _octetBufferLength = 1;
0927: break;
0928: // Small length
0929: case 1:
0930: _octetBufferLength = 2;
0931: break;
0932: // Medium length
0933: case 2:
0934: _octetBufferLength = read()
0935: + EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT;
0936: break;
0937: // Large length
0938: case 3:
0939: _octetBufferLength = (read() << 24) | (read() << 16)
0940: | (read() << 8) | read();
0941: _octetBufferLength += EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT;
0942: break;
0943: }
0944:
0945: ensureOctetBufferSize();
0946: _octetBufferStart = _octetBufferOffset;
0947: _octetBufferOffset += _octetBufferLength;
0948: }
0949:
0950: /*
0951: * C.13
0952: */
0953: protected final String decodeIdentifyingNonEmptyStringOnFirstBit(
0954: StringArray table) throws FastInfosetException, IOException {
0955: final int b = read();
0956: switch (DecoderStateTables.ISTRING[b]) {
0957: case DecoderStateTables.ISTRING_SMALL_LENGTH: {
0958: _octetBufferLength = b + 1;
0959: final String s = (_stringInterning) ? decodeUtf8StringAsString()
0960: .intern()
0961: : decodeUtf8StringAsString();
0962: _identifier = table.add(s) - 1;
0963: return s;
0964: }
0965: case DecoderStateTables.ISTRING_MEDIUM_LENGTH: {
0966: _octetBufferLength = read()
0967: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT;
0968: final String s = (_stringInterning) ? decodeUtf8StringAsString()
0969: .intern()
0970: : decodeUtf8StringAsString();
0971: _identifier = table.add(s) - 1;
0972: return s;
0973: }
0974: case DecoderStateTables.ISTRING_LARGE_LENGTH: {
0975: final int length = (read() << 24) | (read() << 16)
0976: | (read() << 8) | read();
0977: _octetBufferLength = length
0978: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
0979: final String s = (_stringInterning) ? decodeUtf8StringAsString()
0980: .intern()
0981: : decodeUtf8StringAsString();
0982: _identifier = table.add(s) - 1;
0983: return s;
0984: }
0985: case DecoderStateTables.ISTRING_INDEX_SMALL:
0986: _identifier = b
0987: & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
0988: return table._array[_identifier];
0989: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
0990: _identifier = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
0991: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
0992: return table._array[_identifier];
0993: case DecoderStateTables.ISTRING_INDEX_LARGE:
0994: _identifier = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
0995: | (read() << 8) | read())
0996: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
0997: return table._array[_identifier];
0998: default:
0999: throw new FastInfosetException(CommonResourceBundle
1000: .getInstance().getString(
1001: "message.decodingIdentifyingString"));
1002: }
1003: }
1004:
1005: protected int _prefixIndex;
1006:
1007: /*
1008: * C.13
1009: */
1010: protected final String decodeIdentifyingNonEmptyStringOnFirstBitAsPrefix(
1011: boolean namespaceNamePresent) throws FastInfosetException,
1012: IOException {
1013: final int b = read();
1014: switch (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[b]) {
1015: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_3: {
1016: _octetBufferLength = EncodingConstants.XML_NAMESPACE_PREFIX_LENGTH;
1017: decodeUtf8StringAsCharBuffer();
1018:
1019: if (_charBuffer[0] == 'x' && _charBuffer[1] == 'm'
1020: && _charBuffer[2] == 'l') {
1021: throw new FastInfosetException(CommonResourceBundle
1022: .getInstance().getString(
1023: "message.prefixIllegal"));
1024: }
1025:
1026: final String s = (_stringInterning) ? new String(
1027: _charBuffer, 0, _charBufferLength).intern()
1028: : new String(_charBuffer, 0, _charBufferLength);
1029: _prefixIndex = _v.prefix.add(s);
1030: return s;
1031: }
1032: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_5: {
1033: _octetBufferLength = EncodingConstants.XMLNS_NAMESPACE_PREFIX_LENGTH;
1034: decodeUtf8StringAsCharBuffer();
1035:
1036: if (_charBuffer[0] == 'x' && _charBuffer[1] == 'm'
1037: && _charBuffer[2] == 'l' && _charBuffer[3] == 'n'
1038: && _charBuffer[4] == 's') {
1039: throw new FastInfosetException(CommonResourceBundle
1040: .getInstance().getString("message.xmlns"));
1041: }
1042:
1043: final String s = (_stringInterning) ? new String(
1044: _charBuffer, 0, _charBufferLength).intern()
1045: : new String(_charBuffer, 0, _charBufferLength);
1046: _prefixIndex = _v.prefix.add(s);
1047: return s;
1048: }
1049: case DecoderStateTables.ISTRING_SMALL_LENGTH:
1050: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_29:
1051: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_36: {
1052: _octetBufferLength = b + 1;
1053: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1054: .intern()
1055: : decodeUtf8StringAsString();
1056: _prefixIndex = _v.prefix.add(s);
1057: return s;
1058: }
1059: case DecoderStateTables.ISTRING_MEDIUM_LENGTH: {
1060: _octetBufferLength = read()
1061: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT;
1062: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1063: .intern()
1064: : decodeUtf8StringAsString();
1065: _prefixIndex = _v.prefix.add(s);
1066: return s;
1067: }
1068: case DecoderStateTables.ISTRING_LARGE_LENGTH: {
1069: final int length = (read() << 24) | (read() << 16)
1070: | (read() << 8) | read();
1071: _octetBufferLength = length
1072: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
1073: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1074: .intern()
1075: : decodeUtf8StringAsString();
1076: _prefixIndex = _v.prefix.add(s);
1077: return s;
1078: }
1079: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO:
1080: if (namespaceNamePresent) {
1081: _prefixIndex = 0;
1082: // Peak at next byte and check the index of the XML namespace name
1083: if (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[peek()] != DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO) {
1084: throw new FastInfosetException(CommonResourceBundle
1085: .getInstance().getString(
1086: "message.wrongNamespaceName"));
1087: }
1088: return EncodingConstants.XML_NAMESPACE_PREFIX;
1089: } else {
1090: throw new FastInfosetException(CommonResourceBundle
1091: .getInstance().getString(
1092: "message.missingNamespaceName"));
1093: }
1094: case DecoderStateTables.ISTRING_INDEX_SMALL:
1095: _prefixIndex = b
1096: & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
1097: return _v.prefix._array[_prefixIndex - 1];
1098: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1099: _prefixIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
1100: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1101: return _v.prefix._array[_prefixIndex - 1];
1102: case DecoderStateTables.ISTRING_INDEX_LARGE:
1103: _prefixIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
1104: | (read() << 8) | read())
1105: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1106: return _v.prefix._array[_prefixIndex - 1];
1107: default:
1108: throw new FastInfosetException(
1109: CommonResourceBundle
1110: .getInstance()
1111: .getString(
1112: "message.decodingIdentifyingStringForPrefix"));
1113: }
1114: }
1115:
1116: /*
1117: * C.13
1118: */
1119: protected final String decodeIdentifyingNonEmptyStringIndexOnFirstBitAsPrefix(
1120: boolean namespaceNamePresent) throws FastInfosetException,
1121: IOException {
1122: final int b = read();
1123: switch (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[b]) {
1124: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO:
1125: if (namespaceNamePresent) {
1126: _prefixIndex = 0;
1127: // Peak at next byte and check the index of the XML namespace name
1128: if (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[peek()] != DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO) {
1129: throw new FastInfosetException(CommonResourceBundle
1130: .getInstance().getString(
1131: "message.wrongNamespaceName"));
1132: }
1133: return EncodingConstants.XML_NAMESPACE_PREFIX;
1134: } else {
1135: throw new FastInfosetException(CommonResourceBundle
1136: .getInstance().getString(
1137: "message.missingNamespaceName"));
1138: }
1139: case DecoderStateTables.ISTRING_INDEX_SMALL:
1140: _prefixIndex = b
1141: & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
1142: return _v.prefix._array[_prefixIndex - 1];
1143: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1144: _prefixIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
1145: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1146: return _v.prefix._array[_prefixIndex - 1];
1147: case DecoderStateTables.ISTRING_INDEX_LARGE:
1148: _prefixIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
1149: | (read() << 8) | read())
1150: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1151: return _v.prefix._array[_prefixIndex - 1];
1152: default:
1153: throw new FastInfosetException(
1154: CommonResourceBundle
1155: .getInstance()
1156: .getString(
1157: "message.decodingIdentifyingStringForPrefix"));
1158: }
1159: }
1160:
1161: protected int _namespaceNameIndex;
1162:
1163: /*
1164: * C.13
1165: */
1166: protected final String decodeIdentifyingNonEmptyStringOnFirstBitAsNamespaceName(
1167: boolean prefixPresent) throws FastInfosetException,
1168: IOException {
1169: final int b = read();
1170: switch (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[b]) {
1171: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_3:
1172: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_5:
1173: case DecoderStateTables.ISTRING_SMALL_LENGTH: {
1174: _octetBufferLength = b + 1;
1175: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1176: .intern()
1177: : decodeUtf8StringAsString();
1178: _namespaceNameIndex = _v.namespaceName.add(s);
1179: return s;
1180: }
1181: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_29: {
1182: _octetBufferLength = EncodingConstants.XMLNS_NAMESPACE_NAME_LENGTH;
1183: decodeUtf8StringAsCharBuffer();
1184:
1185: if (compareCharsWithCharBufferFromEndToStart(XMLNS_NAMESPACE_NAME_CHARS)) {
1186: throw new FastInfosetException(CommonResourceBundle
1187: .getInstance().getString(
1188: "message.xmlnsConnotBeBoundToPrefix"));
1189: }
1190:
1191: final String s = (_stringInterning) ? new String(
1192: _charBuffer, 0, _charBufferLength).intern()
1193: : new String(_charBuffer, 0, _charBufferLength);
1194: _namespaceNameIndex = _v.namespaceName.add(s);
1195: return s;
1196: }
1197: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_LENGTH_36: {
1198: _octetBufferLength = EncodingConstants.XML_NAMESPACE_NAME_LENGTH;
1199: decodeUtf8StringAsCharBuffer();
1200:
1201: if (compareCharsWithCharBufferFromEndToStart(XML_NAMESPACE_NAME_CHARS)) {
1202: throw new FastInfosetException(CommonResourceBundle
1203: .getInstance().getString(
1204: "message.illegalNamespaceName"));
1205: }
1206:
1207: final String s = (_stringInterning) ? new String(
1208: _charBuffer, 0, _charBufferLength).intern()
1209: : new String(_charBuffer, 0, _charBufferLength);
1210: _namespaceNameIndex = _v.namespaceName.add(s);
1211: return s;
1212: }
1213: case DecoderStateTables.ISTRING_MEDIUM_LENGTH: {
1214: _octetBufferLength = read()
1215: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT;
1216: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1217: .intern()
1218: : decodeUtf8StringAsString();
1219: _namespaceNameIndex = _v.namespaceName.add(s);
1220: return s;
1221: }
1222: case DecoderStateTables.ISTRING_LARGE_LENGTH: {
1223: final int length = (read() << 24) | (read() << 16)
1224: | (read() << 8) | read();
1225: _octetBufferLength = length
1226: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
1227: final String s = (_stringInterning) ? decodeUtf8StringAsString()
1228: .intern()
1229: : decodeUtf8StringAsString();
1230: _namespaceNameIndex = _v.namespaceName.add(s);
1231: return s;
1232: }
1233: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO:
1234: if (prefixPresent) {
1235: _namespaceNameIndex = 0;
1236: return EncodingConstants.XML_NAMESPACE_NAME;
1237: } else {
1238: throw new FastInfosetException(CommonResourceBundle
1239: .getInstance().getString(
1240: "message.namespaceWithoutPrefix"));
1241: }
1242: case DecoderStateTables.ISTRING_INDEX_SMALL:
1243: _namespaceNameIndex = b
1244: & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
1245: return _v.namespaceName._array[_namespaceNameIndex - 1];
1246: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1247: _namespaceNameIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
1248: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1249: return _v.namespaceName._array[_namespaceNameIndex - 1];
1250: case DecoderStateTables.ISTRING_INDEX_LARGE:
1251: _namespaceNameIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
1252: | (read() << 8) | read())
1253: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1254: return _v.namespaceName._array[_namespaceNameIndex - 1];
1255: default:
1256: throw new FastInfosetException(CommonResourceBundle
1257: .getInstance().getString(
1258: "message.decodingForNamespaceName"));
1259: }
1260: }
1261:
1262: /*
1263: * C.13
1264: */
1265: protected final String decodeIdentifyingNonEmptyStringIndexOnFirstBitAsNamespaceName(
1266: boolean prefixPresent) throws FastInfosetException,
1267: IOException {
1268: final int b = read();
1269: switch (DecoderStateTables.ISTRING_PREFIX_NAMESPACE[b]) {
1270: case DecoderStateTables.ISTRING_PREFIX_NAMESPACE_INDEX_ZERO:
1271: if (prefixPresent) {
1272: _namespaceNameIndex = 0;
1273: return EncodingConstants.XML_NAMESPACE_NAME;
1274: } else {
1275: throw new FastInfosetException(CommonResourceBundle
1276: .getInstance().getString(
1277: "message.namespaceWithoutPrefix"));
1278: }
1279: case DecoderStateTables.ISTRING_INDEX_SMALL:
1280: _namespaceNameIndex = b
1281: & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
1282: return _v.namespaceName._array[_namespaceNameIndex - 1];
1283: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1284: _namespaceNameIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
1285: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1286: return _v.namespaceName._array[_namespaceNameIndex - 1];
1287: case DecoderStateTables.ISTRING_INDEX_LARGE:
1288: _namespaceNameIndex = (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
1289: | (read() << 8) | read())
1290: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1291: return _v.namespaceName._array[_namespaceNameIndex - 1];
1292: default:
1293: throw new FastInfosetException(CommonResourceBundle
1294: .getInstance().getString(
1295: "message.decodingForNamespaceName"));
1296: }
1297: }
1298:
1299: private boolean compareCharsWithCharBufferFromEndToStart(char[] c) {
1300: int i = _charBufferLength;
1301: while (--i >= 0) {
1302: if (c[i] != _charBuffer[i]) {
1303: return false;
1304: }
1305: }
1306: return true;
1307: }
1308:
1309: /*
1310: * C.22
1311: */
1312: protected final String decodeNonEmptyOctetStringOnSecondBitAsUtf8String()
1313: throws FastInfosetException, IOException {
1314: decodeNonEmptyOctetStringOnSecondBitAsUtf8CharArray();
1315: return new String(_charBuffer, 0, _charBufferLength);
1316: }
1317:
1318: /*
1319: * C.22
1320: */
1321: protected final void decodeNonEmptyOctetStringOnSecondBitAsUtf8CharArray()
1322: throws FastInfosetException, IOException {
1323: decodeNonEmptyOctetStringLengthOnSecondBit();
1324: decodeUtf8StringAsCharBuffer();
1325: }
1326:
1327: /*
1328: * C.22
1329: */
1330: protected final void decodeNonEmptyOctetStringLengthOnSecondBit()
1331: throws FastInfosetException, IOException {
1332: final int b = read();
1333: switch (DecoderStateTables.ISTRING[b]) {
1334: case DecoderStateTables.ISTRING_SMALL_LENGTH:
1335: _octetBufferLength = b + 1;
1336: break;
1337: case DecoderStateTables.ISTRING_MEDIUM_LENGTH:
1338: _octetBufferLength = read()
1339: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT;
1340: break;
1341: case DecoderStateTables.ISTRING_LARGE_LENGTH: {
1342: final int length = (read() << 24) | (read() << 16)
1343: | (read() << 8) | read();
1344: _octetBufferLength = length
1345: + EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
1346: break;
1347: }
1348: case DecoderStateTables.ISTRING_INDEX_SMALL:
1349: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1350: case DecoderStateTables.ISTRING_INDEX_LARGE:
1351: default:
1352: throw new FastInfosetException(CommonResourceBundle
1353: .getInstance().getString(
1354: "message.decodingNonEmptyOctet"));
1355: }
1356: }
1357:
1358: /*
1359: * C.25
1360: */
1361: protected final int decodeIntegerIndexOnSecondBit()
1362: throws FastInfosetException, IOException {
1363: final int b = read();
1364: switch (DecoderStateTables.ISTRING[b]) {
1365: case DecoderStateTables.ISTRING_INDEX_SMALL:
1366: return b & EncodingConstants.INTEGER_2ND_BIT_SMALL_MASK;
1367: case DecoderStateTables.ISTRING_INDEX_MEDIUM:
1368: return (((b & EncodingConstants.INTEGER_2ND_BIT_MEDIUM_MASK) << 8) | read())
1369: + EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1370: case DecoderStateTables.ISTRING_INDEX_LARGE:
1371: return (((b & EncodingConstants.INTEGER_2ND_BIT_LARGE_MASK) << 16)
1372: | (read() << 8) | read())
1373: + EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1374: case DecoderStateTables.ISTRING_SMALL_LENGTH:
1375: case DecoderStateTables.ISTRING_MEDIUM_LENGTH:
1376: case DecoderStateTables.ISTRING_LARGE_LENGTH:
1377: default:
1378: throw new FastInfosetException(CommonResourceBundle
1379: .getInstance().getString(
1380: "message.decodingIndexOnSecondBit"));
1381: }
1382: }
1383:
1384: protected final void decodeHeader() throws FastInfosetException,
1385: IOException {
1386: if (!_isFastInfosetDocument()) {
1387: throw new FastInfosetException(CommonResourceBundle
1388: .getInstance().getString("message.notFIDocument"));
1389: }
1390: }
1391:
1392: protected final void decodeRestrictedAlphabetAsCharBuffer()
1393: throws FastInfosetException, IOException {
1394: if (_identifier <= EncodingConstants.RESTRICTED_ALPHABET_BUILTIN_END) {
1395: decodeFourBitAlphabetOctetsAsCharBuffer(BuiltInRestrictedAlphabets.table[_identifier]);
1396: // decodeAlphabetOctetsAsCharBuffer(BuiltInRestrictedAlphabets.table[_identifier]);
1397: } else if (_identifier >= EncodingConstants.RESTRICTED_ALPHABET_APPLICATION_START) {
1398: CharArray ca = _v.restrictedAlphabet
1399: .get(_identifier
1400: - EncodingConstants.RESTRICTED_ALPHABET_APPLICATION_START);
1401: if (ca == null) {
1402: throw new FastInfosetException(CommonResourceBundle
1403: .getInstance().getString(
1404: "message.alphabetNotPresent",
1405: new Object[] { Integer
1406: .valueOf(_identifier) }));
1407: }
1408: decodeAlphabetOctetsAsCharBuffer(ca.ch);
1409: } else {
1410: // Reserved built-in algorithms for future use
1411: // TODO should use sax property to decide if event will be
1412: // reported, allows for support through handler if required.
1413: throw new FastInfosetException(CommonResourceBundle
1414: .getInstance().getString(
1415: "message.alphabetIdentifiersReserved"));
1416: }
1417: }
1418:
1419: protected final String decodeRestrictedAlphabetAsString()
1420: throws FastInfosetException, IOException {
1421: decodeRestrictedAlphabetAsCharBuffer();
1422: return new String(_charBuffer, 0, _charBufferLength);
1423: }
1424:
1425: protected final String decodeRAOctetsAsString(
1426: char[] restrictedAlphabet) throws FastInfosetException,
1427: IOException {
1428: decodeAlphabetOctetsAsCharBuffer(restrictedAlphabet);
1429: return new String(_charBuffer, 0, _charBufferLength);
1430: }
1431:
1432: protected final void decodeFourBitAlphabetOctetsAsCharBuffer(
1433: char[] restrictedAlphabet) throws FastInfosetException,
1434: IOException {
1435: _charBufferLength = 0;
1436: final int characters = _octetBufferLength * 2;
1437: if (_charBuffer.length < characters) {
1438: _charBuffer = new char[characters];
1439: }
1440:
1441: int v = 0;
1442: for (int i = 0; i < _octetBufferLength - 1; i++) {
1443: v = _octetBuffer[_octetBufferStart++] & 0xFF;
1444: _charBuffer[_charBufferLength++] = restrictedAlphabet[v >> 4];
1445: _charBuffer[_charBufferLength++] = restrictedAlphabet[v & 0x0F];
1446: }
1447: v = _octetBuffer[_octetBufferStart++] & 0xFF;
1448: _charBuffer[_charBufferLength++] = restrictedAlphabet[v >> 4];
1449: v &= 0x0F;
1450: if (v != 0x0F) {
1451: _charBuffer[_charBufferLength++] = restrictedAlphabet[v & 0x0F];
1452: }
1453: }
1454:
1455: protected final void decodeAlphabetOctetsAsCharBuffer(
1456: char[] restrictedAlphabet) throws FastInfosetException,
1457: IOException {
1458: if (restrictedAlphabet.length < 2) {
1459: throw new IllegalArgumentException(CommonResourceBundle
1460: .getInstance().getString(
1461: "message.alphabetMustContain2orMoreChars"));
1462: }
1463:
1464: int bitsPerCharacter = 1;
1465: while ((1 << bitsPerCharacter) <= restrictedAlphabet.length) {
1466: bitsPerCharacter++;
1467: }
1468: final int terminatingValue = (1 << bitsPerCharacter) - 1;
1469:
1470: int characters = (_octetBufferLength << 3) / bitsPerCharacter;
1471: if (characters == 0) {
1472: throw new IOException("");
1473: }
1474:
1475: _charBufferLength = 0;
1476: if (_charBuffer.length < characters) {
1477: _charBuffer = new char[characters];
1478: }
1479:
1480: resetBits();
1481: for (int i = 0; i < characters; i++) {
1482: int value = readBits(bitsPerCharacter);
1483: if (bitsPerCharacter < 8 && value == terminatingValue) {
1484: int octetPosition = (i * bitsPerCharacter) >>> 3;
1485: if (octetPosition != _octetBufferLength - 1) {
1486: throw new FastInfosetException(
1487: CommonResourceBundle
1488: .getInstance()
1489: .getString(
1490: "message.alphabetIncorrectlyTerminated"));
1491: }
1492: break;
1493: }
1494: _charBuffer[_charBufferLength++] = restrictedAlphabet[value];
1495: }
1496: }
1497:
1498: private int _bitsLeftInOctet;
1499:
1500: private void resetBits() {
1501: _bitsLeftInOctet = 0;
1502: }
1503:
1504: private int readBits(int bits) throws IOException {
1505: int value = 0;
1506: while (bits > 0) {
1507: if (_bitsLeftInOctet == 0) {
1508: _b = _octetBuffer[_octetBufferStart++] & 0xFF;
1509: _bitsLeftInOctet = 8;
1510: }
1511: int bit = ((_b & (1 << --_bitsLeftInOctet)) > 0) ? 1 : 0;
1512: value |= (bit << --bits);
1513: }
1514:
1515: return value;
1516: }
1517:
1518: protected final void decodeUtf8StringAsCharBuffer()
1519: throws IOException {
1520: ensureOctetBufferSize();
1521: decodeUtf8StringIntoCharBuffer();
1522: }
1523:
1524: protected final void decodeUtf8StringAsCharBuffer(char[] ch,
1525: int offset) throws IOException {
1526: ensureOctetBufferSize();
1527: decodeUtf8StringIntoCharBuffer(ch, offset);
1528: }
1529:
1530: protected final String decodeUtf8StringAsString()
1531: throws IOException {
1532: decodeUtf8StringAsCharBuffer();
1533: return new String(_charBuffer, 0, _charBufferLength);
1534: }
1535:
1536: protected final void decodeUtf16StringAsCharBuffer()
1537: throws IOException {
1538: ensureOctetBufferSize();
1539: decodeUtf16StringIntoCharBuffer();
1540: }
1541:
1542: protected final String decodeUtf16StringAsString()
1543: throws IOException {
1544: decodeUtf16StringAsCharBuffer();
1545: return new String(_charBuffer, 0, _charBufferLength);
1546: }
1547:
1548: private void ensureOctetBufferSize() throws IOException {
1549: if (_octetBufferEnd < (_octetBufferOffset + _octetBufferLength)) {
1550: final int octetsInBuffer = _octetBufferEnd
1551: - _octetBufferOffset;
1552:
1553: if (_octetBuffer.length < _octetBufferLength) {
1554: // Length to read is too large, resize the buffer
1555: byte[] newOctetBuffer = new byte[_octetBufferLength];
1556: // Move partially read octets to the start of the buffer
1557: System.arraycopy(_octetBuffer, _octetBufferOffset,
1558: newOctetBuffer, 0, octetsInBuffer);
1559: _octetBuffer = newOctetBuffer;
1560: } else {
1561: // Move partially read octets to the start of the buffer
1562: System.arraycopy(_octetBuffer, _octetBufferOffset,
1563: _octetBuffer, 0, octetsInBuffer);
1564: }
1565: _octetBufferOffset = 0;
1566:
1567: // Read as many octets as possible to fill the buffer
1568: final int octetsRead = _s.read(_octetBuffer,
1569: octetsInBuffer, _octetBuffer.length
1570: - octetsInBuffer);
1571: if (octetsRead < 0) {
1572: throw new EOFException("Unexpeceted EOF");
1573: }
1574: _octetBufferEnd = octetsInBuffer + octetsRead;
1575:
1576: // Check if the number of octets that have been read is not enough
1577: // This can happen when underlying non-blocking is used to read
1578: if (_octetBufferEnd < _octetBufferLength) {
1579: repeatedRead();
1580: }
1581: }
1582: }
1583:
1584: private void repeatedRead() throws IOException {
1585: // Check if the number of octets that have been read is not enough
1586: while (_octetBufferEnd < _octetBufferLength) {
1587: // Read as many octets as possible to fill the buffer
1588: final int octetsRead = _s.read(_octetBuffer,
1589: _octetBufferEnd, _octetBuffer.length
1590: - _octetBufferEnd);
1591: if (octetsRead < 0) {
1592: throw new EOFException("Unexpeceted EOF");
1593: }
1594: _octetBufferEnd += octetsRead;
1595: }
1596: }
1597:
1598: protected final void decodeUtf8StringIntoCharBuffer()
1599: throws IOException {
1600: if (_charBuffer.length < _octetBufferLength) {
1601: _charBuffer = new char[_octetBufferLength];
1602: }
1603:
1604: _charBufferLength = 0;
1605: final int end = _octetBufferLength + _octetBufferOffset;
1606: int b1;
1607: while (end != _octetBufferOffset) {
1608: b1 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1609: if (DecoderStateTables.UTF8[b1] == DecoderStateTables.UTF8_ONE_BYTE) {
1610: _charBuffer[_charBufferLength++] = (char) b1;
1611: } else {
1612: decodeTwoToFourByteUtf8Character(b1, end);
1613: }
1614: }
1615: }
1616:
1617: protected final void decodeUtf8StringIntoCharBuffer(char[] ch,
1618: int offset) throws IOException {
1619: _charBufferLength = offset;
1620: final int end = _octetBufferLength + _octetBufferOffset;
1621: int b1;
1622: while (end != _octetBufferOffset) {
1623: b1 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1624: if (DecoderStateTables.UTF8[b1] == DecoderStateTables.UTF8_ONE_BYTE) {
1625: ch[_charBufferLength++] = (char) b1;
1626: } else {
1627: decodeTwoToFourByteUtf8Character(ch, b1, end);
1628: }
1629: }
1630: _charBufferLength -= offset;
1631: }
1632:
1633: private void decodeTwoToFourByteUtf8Character(int b1, int end)
1634: throws IOException {
1635: switch (DecoderStateTables.UTF8[b1]) {
1636: case DecoderStateTables.UTF8_TWO_BYTES: {
1637: // Decode byte 2
1638: if (end == _octetBufferOffset) {
1639: decodeUtf8StringLengthTooSmall();
1640: }
1641: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1642: if ((b2 & 0xC0) != 0x80) {
1643: decodeUtf8StringIllegalState();
1644: }
1645:
1646: // Character guaranteed to be in [0x20, 0xD7FF] range
1647: // since a character encoded in two bytes will be in the
1648: // range [0x80, 0x1FFF]
1649: _charBuffer[_charBufferLength++] = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
1650: break;
1651: }
1652: case DecoderStateTables.UTF8_THREE_BYTES:
1653: final char c = decodeUtf8ThreeByteChar(end, b1);
1654: if (XMLChar.isContent(c)) {
1655: _charBuffer[_charBufferLength++] = c;
1656: } else {
1657: decodeUtf8StringIllegalState();
1658: }
1659: break;
1660: case DecoderStateTables.UTF8_FOUR_BYTES: {
1661: final int supplemental = decodeUtf8FourByteChar(end, b1);
1662: if (XMLChar.isContent(supplemental)) {
1663: _charBuffer[_charBufferLength++] = _utf8_highSurrogate;
1664: _charBuffer[_charBufferLength++] = _utf8_lowSurrogate;
1665: } else {
1666: decodeUtf8StringIllegalState();
1667: }
1668: break;
1669: }
1670: default:
1671: decodeUtf8StringIllegalState();
1672: }
1673: }
1674:
1675: private void decodeTwoToFourByteUtf8Character(char ch[], int b1,
1676: int end) throws IOException {
1677: switch (DecoderStateTables.UTF8[b1]) {
1678: case DecoderStateTables.UTF8_TWO_BYTES: {
1679: // Decode byte 2
1680: if (end == _octetBufferOffset) {
1681: decodeUtf8StringLengthTooSmall();
1682: }
1683: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1684: if ((b2 & 0xC0) != 0x80) {
1685: decodeUtf8StringIllegalState();
1686: }
1687:
1688: // Character guaranteed to be in [0x20, 0xD7FF] range
1689: // since a character encoded in two bytes will be in the
1690: // range [0x80, 0x1FFF]
1691: ch[_charBufferLength++] = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
1692: break;
1693: }
1694: case DecoderStateTables.UTF8_THREE_BYTES:
1695: final char c = decodeUtf8ThreeByteChar(end, b1);
1696: if (XMLChar.isContent(c)) {
1697: ch[_charBufferLength++] = c;
1698: } else {
1699: decodeUtf8StringIllegalState();
1700: }
1701: break;
1702: case DecoderStateTables.UTF8_FOUR_BYTES: {
1703: final int supplemental = decodeUtf8FourByteChar(end, b1);
1704: if (XMLChar.isContent(supplemental)) {
1705: ch[_charBufferLength++] = _utf8_highSurrogate;
1706: ch[_charBufferLength++] = _utf8_lowSurrogate;
1707: } else {
1708: decodeUtf8StringIllegalState();
1709: }
1710: break;
1711: }
1712: default:
1713: decodeUtf8StringIllegalState();
1714: }
1715: }
1716:
1717: protected final void decodeUtf8NCNameIntoCharBuffer()
1718: throws IOException {
1719: _charBufferLength = 0;
1720: if (_charBuffer.length < _octetBufferLength) {
1721: _charBuffer = new char[_octetBufferLength];
1722: }
1723:
1724: final int end = _octetBufferLength + _octetBufferOffset;
1725:
1726: int b1 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1727: if (DecoderStateTables.UTF8_NCNAME[b1] == DecoderStateTables.UTF8_NCNAME_NCNAME) {
1728: _charBuffer[_charBufferLength++] = (char) b1;
1729: } else {
1730: decodeUtf8NCNameStartTwoToFourByteCharacters(b1, end);
1731: }
1732:
1733: while (end != _octetBufferOffset) {
1734: b1 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1735: if (DecoderStateTables.UTF8_NCNAME[b1] < DecoderStateTables.UTF8_TWO_BYTES) {
1736: _charBuffer[_charBufferLength++] = (char) b1;
1737: } else {
1738: decodeUtf8NCNameTwoToFourByteCharacters(b1, end);
1739: }
1740: }
1741: }
1742:
1743: private void decodeUtf8NCNameStartTwoToFourByteCharacters(int b1,
1744: int end) throws IOException {
1745: switch (DecoderStateTables.UTF8_NCNAME[b1]) {
1746: case DecoderStateTables.UTF8_TWO_BYTES: {
1747: // Decode byte 2
1748: if (end == _octetBufferOffset) {
1749: decodeUtf8StringLengthTooSmall();
1750: }
1751: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1752: if ((b2 & 0xC0) != 0x80) {
1753: decodeUtf8StringIllegalState();
1754: }
1755:
1756: final char c = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
1757: if (XMLChar.isNCNameStart(c)) {
1758: _charBuffer[_charBufferLength++] = c;
1759: } else {
1760: decodeUtf8NCNameIllegalState();
1761: }
1762: break;
1763: }
1764: case DecoderStateTables.UTF8_THREE_BYTES:
1765: final char c = decodeUtf8ThreeByteChar(end, b1);
1766: if (XMLChar.isNCNameStart(c)) {
1767: _charBuffer[_charBufferLength++] = c;
1768: } else {
1769: decodeUtf8NCNameIllegalState();
1770: }
1771: break;
1772: case DecoderStateTables.UTF8_FOUR_BYTES: {
1773: final int supplemental = decodeUtf8FourByteChar(end, b1);
1774: if (XMLChar.isNCNameStart(supplemental)) {
1775: _charBuffer[_charBufferLength++] = _utf8_highSurrogate;
1776: _charBuffer[_charBufferLength++] = _utf8_lowSurrogate;
1777: } else {
1778: decodeUtf8NCNameIllegalState();
1779: }
1780: break;
1781: }
1782: case DecoderStateTables.UTF8_NCNAME_NCNAME_CHAR:
1783: default:
1784: decodeUtf8NCNameIllegalState();
1785: }
1786:
1787: }
1788:
1789: private void decodeUtf8NCNameTwoToFourByteCharacters(int b1, int end)
1790: throws IOException {
1791: switch (DecoderStateTables.UTF8_NCNAME[b1]) {
1792: case DecoderStateTables.UTF8_TWO_BYTES: {
1793: // Decode byte 2
1794: if (end == _octetBufferOffset) {
1795: decodeUtf8StringLengthTooSmall();
1796: }
1797: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1798: if ((b2 & 0xC0) != 0x80) {
1799: decodeUtf8StringIllegalState();
1800: }
1801:
1802: final char c = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
1803: if (XMLChar.isNCName(c)) {
1804: _charBuffer[_charBufferLength++] = c;
1805: } else {
1806: decodeUtf8NCNameIllegalState();
1807: }
1808: break;
1809: }
1810: case DecoderStateTables.UTF8_THREE_BYTES:
1811: final char c = decodeUtf8ThreeByteChar(end, b1);
1812: if (XMLChar.isNCName(c)) {
1813: _charBuffer[_charBufferLength++] = c;
1814: } else {
1815: decodeUtf8NCNameIllegalState();
1816: }
1817: break;
1818: case DecoderStateTables.UTF8_FOUR_BYTES: {
1819: final int supplemental = decodeUtf8FourByteChar(end, b1);
1820: if (XMLChar.isNCName(supplemental)) {
1821: _charBuffer[_charBufferLength++] = _utf8_highSurrogate;
1822: _charBuffer[_charBufferLength++] = _utf8_lowSurrogate;
1823: } else {
1824: decodeUtf8NCNameIllegalState();
1825: }
1826: break;
1827: }
1828: default:
1829: decodeUtf8NCNameIllegalState();
1830: }
1831: }
1832:
1833: private char decodeUtf8ThreeByteChar(int end, int b1)
1834: throws IOException {
1835: // Decode byte 2
1836: if (end == _octetBufferOffset) {
1837: decodeUtf8StringLengthTooSmall();
1838: }
1839: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1840: if ((b2 & 0xC0) != 0x80 || (b1 == 0xED && b2 >= 0xA0)
1841: || ((b1 & 0x0F) == 0 && (b2 & 0x20) == 0)) {
1842: decodeUtf8StringIllegalState();
1843: }
1844:
1845: // Decode byte 3
1846: if (end == _octetBufferOffset) {
1847: decodeUtf8StringLengthTooSmall();
1848: }
1849: final int b3 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1850: if ((b3 & 0xC0) != 0x80) {
1851: decodeUtf8StringIllegalState();
1852: }
1853:
1854: return (char) ((b1 & 0x0F) << 12 | (b2 & 0x3F) << 6 | (b3 & 0x3F));
1855: }
1856:
1857: private char _utf8_highSurrogate;
1858: private char _utf8_lowSurrogate;
1859:
1860: private int decodeUtf8FourByteChar(int end, int b1)
1861: throws IOException {
1862: // Decode byte 2
1863: if (end == _octetBufferOffset) {
1864: decodeUtf8StringLengthTooSmall();
1865: }
1866: final int b2 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1867: if ((b2 & 0xC0) != 0x80
1868: || ((b2 & 0x30) == 0 && (b1 & 0x07) == 0)) {
1869: decodeUtf8StringIllegalState();
1870: }
1871:
1872: // Decode byte 3
1873: if (end == _octetBufferOffset) {
1874: decodeUtf8StringLengthTooSmall();
1875: }
1876: final int b3 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1877: if ((b3 & 0xC0) != 0x80) {
1878: decodeUtf8StringIllegalState();
1879: }
1880:
1881: // Decode byte 4
1882: if (end == _octetBufferOffset) {
1883: decodeUtf8StringLengthTooSmall();
1884: }
1885: final int b4 = _octetBuffer[_octetBufferOffset++] & 0xFF;
1886: if ((b4 & 0xC0) != 0x80) {
1887: decodeUtf8StringIllegalState();
1888: }
1889:
1890: final int uuuuu = ((b1 << 2) & 0x001C) | ((b2 >> 4) & 0x0003);
1891: if (uuuuu > 0x10) {
1892: decodeUtf8StringIllegalState();
1893: }
1894: final int wwww = uuuuu - 1;
1895:
1896: _utf8_highSurrogate = (char) (0xD800 | ((wwww << 6) & 0x03C0)
1897: | ((b2 << 2) & 0x003C) | ((b3 >> 4) & 0x0003));
1898: _utf8_lowSurrogate = (char) (0xDC00 | ((b3 << 6) & 0x03C0) | (b4 & 0x003F));
1899:
1900: return XMLChar.supplemental(_utf8_highSurrogate,
1901: _utf8_lowSurrogate);
1902: }
1903:
1904: private void decodeUtf8StringLengthTooSmall() throws IOException {
1905: throw new IOException(CommonResourceBundle.getInstance()
1906: .getString("message.deliminatorTooSmall"));
1907: }
1908:
1909: private void decodeUtf8StringIllegalState() throws IOException {
1910: throw new IOException(CommonResourceBundle.getInstance()
1911: .getString("message.UTF8Encoded"));
1912: }
1913:
1914: private void decodeUtf8NCNameIllegalState() throws IOException {
1915: throw new IOException(CommonResourceBundle.getInstance()
1916: .getString("message.UTF8EncodedNCName"));
1917: }
1918:
1919: private void decodeUtf16StringIntoCharBuffer() throws IOException {
1920: _charBufferLength = _octetBufferLength / 2;
1921: if (_charBuffer.length < _charBufferLength) {
1922: _charBuffer = new char[_charBufferLength];
1923: }
1924:
1925: for (int i = 0; i < _charBufferLength; i++) {
1926: final char c = (char) ((read() << 8) | read());
1927: // TODO check c is a valid Char character
1928: _charBuffer[i] = c;
1929: }
1930:
1931: }
1932:
1933: protected String createQualifiedNameString(char[] first,
1934: String second) {
1935: final int l1 = first.length;
1936: final int l2 = second.length();
1937: final int total = l1 + l2 + 1;
1938: if (total < _charBuffer.length) {
1939: System.arraycopy(first, 0, _charBuffer, 0, l1);
1940: _charBuffer[l1] = ':';
1941: second.getChars(0, l2, _charBuffer, l1 + 1);
1942: return new String(_charBuffer, 0, total);
1943: } else {
1944: StringBuffer b = new StringBuffer(new String(first));
1945: b.append(':');
1946: b.append(second);
1947: return b.toString();
1948: }
1949: }
1950:
1951: protected final int read() throws IOException {
1952: if (_octetBufferOffset < _octetBufferEnd) {
1953: return _octetBuffer[_octetBufferOffset++] & 0xFF;
1954: } else {
1955: _octetBufferEnd = _s.read(_octetBuffer);
1956: if (_octetBufferEnd < 0) {
1957: throw new EOFException(CommonResourceBundle
1958: .getInstance().getString("message.EOF"));
1959: }
1960:
1961: _octetBufferOffset = 1;
1962: return _octetBuffer[0] & 0xFF;
1963: }
1964: }
1965:
1966: protected final void closeIfRequired() throws IOException {
1967: if (_s != null && _needForceStreamClose) {
1968: _s.close();
1969: }
1970: }
1971:
1972: protected final int peek() throws IOException {
1973: return peek(null);
1974: }
1975:
1976: protected final int peek(OctetBufferListener octetBufferListener)
1977: throws IOException {
1978: if (_octetBufferOffset < _octetBufferEnd) {
1979: return _octetBuffer[_octetBufferOffset] & 0xFF;
1980: } else {
1981: if (octetBufferListener != null) {
1982: octetBufferListener.onBeforeOctetBufferOverwrite();
1983: }
1984:
1985: _octetBufferEnd = _s.read(_octetBuffer);
1986: if (_octetBufferEnd < 0) {
1987: throw new EOFException(CommonResourceBundle
1988: .getInstance().getString("message.EOF"));
1989: }
1990:
1991: _octetBufferOffset = 0;
1992: return _octetBuffer[0] & 0xFF;
1993: }
1994: }
1995:
1996: protected final int peek2(OctetBufferListener octetBufferListener)
1997: throws IOException {
1998: if (_octetBufferOffset + 1 < _octetBufferEnd) {
1999: return _octetBuffer[_octetBufferOffset + 1] & 0xFF;
2000: } else {
2001: if (octetBufferListener != null) {
2002: octetBufferListener.onBeforeOctetBufferOverwrite();
2003: }
2004:
2005: int offset = 0;
2006: if (_octetBufferOffset < _octetBufferEnd) {
2007: _octetBuffer[0] = _octetBuffer[_octetBufferOffset];
2008: offset = 1;
2009: }
2010: _octetBufferEnd = _s.read(_octetBuffer, offset,
2011: _octetBuffer.length - offset);
2012:
2013: if (_octetBufferEnd < 0) {
2014: throw new EOFException(CommonResourceBundle
2015: .getInstance().getString("message.EOF"));
2016: }
2017:
2018: _octetBufferOffset = 0;
2019: return _octetBuffer[1] & 0xFF;
2020: }
2021: }
2022:
2023: protected class EncodingAlgorithmInputStream extends InputStream {
2024:
2025: public int read() throws IOException {
2026: if (_octetBufferStart < _octetBufferOffset) {
2027: return (_octetBuffer[_octetBufferStart++] & 0xFF);
2028: } else {
2029: return -1;
2030: }
2031: }
2032:
2033: public int read(byte b[]) throws IOException {
2034: return read(b, 0, b.length);
2035: }
2036:
2037: public int read(byte b[], int off, int len) throws IOException {
2038: if (b == null) {
2039: throw new NullPointerException();
2040: } else if ((off < 0) || (off > b.length) || (len < 0)
2041: || ((off + len) > b.length) || ((off + len) < 0)) {
2042: throw new IndexOutOfBoundsException();
2043: } else if (len == 0) {
2044: return 0;
2045: }
2046:
2047: final int newOctetBufferStart = _octetBufferStart + len;
2048: if (newOctetBufferStart < _octetBufferOffset) {
2049: System.arraycopy(_octetBuffer, _octetBufferStart, b,
2050: off, len);
2051: _octetBufferStart = newOctetBufferStart;
2052: return len;
2053: } else if (_octetBufferStart < _octetBufferOffset) {
2054: final int bytesToRead = _octetBufferOffset
2055: - _octetBufferStart;
2056: System.arraycopy(_octetBuffer, _octetBufferStart, b,
2057: off, bytesToRead);
2058: _octetBufferStart += bytesToRead;
2059: return bytesToRead;
2060: } else {
2061: return -1;
2062: }
2063: }
2064: }
2065:
2066: protected final boolean _isFastInfosetDocument() throws IOException {
2067: // Fill up the octet buffer
2068: peek();
2069:
2070: _octetBufferLength = EncodingConstants.BINARY_HEADER.length;
2071: ensureOctetBufferSize();
2072: _octetBufferOffset += _octetBufferLength;
2073:
2074: // Check for binary header
2075: if (_octetBuffer[0] != EncodingConstants.BINARY_HEADER[0]
2076: || _octetBuffer[1] != EncodingConstants.BINARY_HEADER[1]
2077: || _octetBuffer[2] != EncodingConstants.BINARY_HEADER[2]
2078: || _octetBuffer[3] != EncodingConstants.BINARY_HEADER[3]) {
2079:
2080: // Check for each form of XML declaration
2081: for (int i = 0; i < EncodingConstants.XML_DECLARATION_VALUES.length; i++) {
2082: _octetBufferLength = EncodingConstants.XML_DECLARATION_VALUES[i].length
2083: - _octetBufferOffset;
2084: ensureOctetBufferSize();
2085: _octetBufferOffset += _octetBufferLength;
2086:
2087: // Check XML declaration
2088: if (arrayEquals(
2089: _octetBuffer,
2090: 0,
2091: EncodingConstants.XML_DECLARATION_VALUES[i],
2092: EncodingConstants.XML_DECLARATION_VALUES[i].length)) {
2093: _octetBufferLength = EncodingConstants.BINARY_HEADER.length;
2094: ensureOctetBufferSize();
2095:
2096: // Check for binary header
2097: if (_octetBuffer[_octetBufferOffset++] != EncodingConstants.BINARY_HEADER[0]
2098: || _octetBuffer[_octetBufferOffset++] != EncodingConstants.BINARY_HEADER[1]
2099: || _octetBuffer[_octetBufferOffset++] != EncodingConstants.BINARY_HEADER[2]
2100: || _octetBuffer[_octetBufferOffset++] != EncodingConstants.BINARY_HEADER[3]) {
2101: return false;
2102: } else {
2103: // Fast Infoset document with XML declaration and binary header
2104: return true;
2105: }
2106: }
2107: }
2108:
2109: return false;
2110: }
2111:
2112: // Fast Infoset document with binary header
2113: return true;
2114: }
2115:
2116: private boolean arrayEquals(byte[] b1, int offset, byte[] b2,
2117: int length) {
2118: for (int i = 0; i < length; i++) {
2119: if (b1[offset + i] != b2[i]) {
2120: return false;
2121: }
2122: }
2123:
2124: return true;
2125: }
2126:
2127: static public boolean isFastInfosetDocument(InputStream s)
2128: throws IOException {
2129: // TODO
2130: // Check for <?xml declaration with 'finf' encoding
2131:
2132: final byte[] header = new byte[4];
2133: s.read(header);
2134: if (header[0] != EncodingConstants.BINARY_HEADER[0]
2135: || header[1] != EncodingConstants.BINARY_HEADER[1]
2136: || header[2] != EncodingConstants.BINARY_HEADER[2]
2137: || header[3] != EncodingConstants.BINARY_HEADER[3]) {
2138: return false;
2139: }
2140:
2141: // TODO
2142: return true;
2143: }
2144: }
|