0001: /*
0002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: *
0025: * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
0026: */
0027:
0028: /*
0029: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
0030: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0031: *
0032: * This code is free software; you can redistribute it and/or modify it
0033: * under the terms of the GNU General Public License version 2 only, as
0034: * published by the Free Software Foundation. Sun designates this
0035: * particular file as subject to the "Classpath" exception as provided
0036: * by Sun in the LICENSE file that accompanied this code.
0037: *
0038: * This code is distributed in the hope that it will be useful, but WITHOUT
0039: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0040: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0041: * version 2 for more details (a copy is included in the LICENSE file that
0042: * accompanied this code).
0043: *
0044: * You should have received a copy of the GNU General Public License version
0045: * 2 along with this work; if not, write to the Free Software Foundation,
0046: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0047: *
0048: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0049: * CA 95054 USA or visit www.sun.com if you need additional information or
0050: * have any questions.
0051: *
0052: * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
0053: *
0054: */
0055: package com.sun.xml.internal.fastinfoset;
0056:
0057: import com.sun.xml.internal.fastinfoset.algorithm.BuiltInEncodingAlgorithm;
0058: import com.sun.xml.internal.fastinfoset.algorithm.BuiltInEncodingAlgorithmFactory;
0059: import com.sun.xml.internal.fastinfoset.org.apache.xerces.util.XMLChar;
0060: import com.sun.xml.internal.fastinfoset.util.CharArrayIntMap;
0061: import com.sun.xml.internal.fastinfoset.util.KeyIntMap;
0062: import com.sun.xml.internal.fastinfoset.util.LocalNameQualifiedNamesMap;
0063: import com.sun.xml.internal.fastinfoset.util.StringIntMap;
0064: import com.sun.xml.internal.fastinfoset.vocab.SerializerVocabulary;
0065: import java.io.IOException;
0066: import java.io.OutputStream;
0067: import java.util.HashMap;
0068: import java.util.Map;
0069: import com.sun.xml.internal.org.jvnet.fastinfoset.EncodingAlgorithm;
0070: import com.sun.xml.internal.org.jvnet.fastinfoset.EncodingAlgorithmException;
0071: import com.sun.xml.internal.org.jvnet.fastinfoset.EncodingAlgorithmIndexes;
0072: import com.sun.xml.internal.org.jvnet.fastinfoset.ExternalVocabulary;
0073: import com.sun.xml.internal.org.jvnet.fastinfoset.FastInfosetException;
0074: import com.sun.xml.internal.org.jvnet.fastinfoset.FastInfosetSerializer;
0075: import org.xml.sax.helpers.DefaultHandler;
0076:
0077: /**
0078: * Abstract encoder for developing concrete encoders.
0079: *
0080: * Concrete implementations extending Encoder will utilize methods on Encoder
0081: * to encode XML infoset according to the Fast Infoset standard. It is the
0082: * responsibility of the concrete implementation to ensure that methods are
0083: * invoked in the correct order to produce a valid fast infoset document.
0084: *
0085: * <p>
0086: * This class extends org.sax.xml.DefaultHandler so that concrete SAX
0087: * implementations can be used with javax.xml.parsers.SAXParser and the parse
0088: * methods that take org.sax.xml.DefaultHandler as a parameter.
0089: *
0090: * <p>
0091: * Buffering of octets that are written to an {@link java.io.OutputStream} is
0092: * supported in a similar manner to a {@link java.io.BufferedOutputStream}.
0093: * Combining buffering with encoding enables better performance.
0094: *
0095: * <p>
0096: * More than one fast infoset document may be encoded to the
0097: * {@link java.io.OutputStream}.
0098: *
0099: */
0100: public abstract class Encoder extends DefaultHandler implements
0101: FastInfosetSerializer {
0102:
0103: /**
0104: * Character encoding scheme system property for the encoding
0105: * of content and attribute values.
0106: */
0107: public static final String CHARACTER_ENCODING_SCHEME_SYSTEM_PROPERTY = "com.sun.xml.internal.fastinfoset.serializer.character-encoding-scheme";
0108:
0109: /**
0110: * Default character encoding scheme system property for the encoding
0111: * of content and attribute values.
0112: */
0113: protected static String _characterEncodingSchemeSystemDefault = UTF_8;
0114:
0115: static {
0116: // Initiate the default character encoding schema from the system
0117: // property
0118: String p = System.getProperty(
0119: CHARACTER_ENCODING_SCHEME_SYSTEM_PROPERTY,
0120: _characterEncodingSchemeSystemDefault);
0121: if (p.equals(UTF_16BE)) {
0122: _characterEncodingSchemeSystemDefault = UTF_16BE;
0123: }
0124: }
0125:
0126: /**
0127: * True if comments shall be ignored.
0128: */
0129: private boolean _ignoreComments;
0130:
0131: /**
0132: * True if procesing instructions shall be ignored.
0133: */
0134: private boolean _ignoreProcessingInstructions;
0135:
0136: /**
0137: * True if white space characters for text content shall be ignored.
0138: */
0139: private boolean _ignoreWhiteSpaceTextContent;
0140:
0141: /**
0142: * True, if the local name string is used as the key to find the
0143: * associated set of qualified names.
0144: * <p>
0145: * False, if the <prefix>:<local name> string is used as the key
0146: * to find the associated set of qualified names.
0147: */
0148: private boolean _useLocalNameAsKeyForQualifiedNameLookup;
0149:
0150: /**
0151: * True if strings for text content and attribute values will be
0152: * UTF-8 encoded otherwise they will be UTF-16 encoded.
0153: */
0154: private boolean _encodingStringsAsUtf8 = true;
0155:
0156: /**
0157: * Encoding constant generated from the string encoding.
0158: */
0159: private int _nonIdentifyingStringOnThirdBitCES;
0160:
0161: /**
0162: * Encoding constant generated from the string encoding.
0163: */
0164: private int _nonIdentifyingStringOnFirstBitCES;
0165:
0166: /**
0167: * The map of URIs to algorithms.
0168: */
0169: private Map _registeredEncodingAlgorithms = new HashMap();
0170:
0171: /**
0172: * The vocabulary that is used by the encoder
0173: */
0174: protected SerializerVocabulary _v;
0175:
0176: /**
0177: * True if the vocubulary is internal to the encoder
0178: */
0179: private boolean _vIsInternal;
0180:
0181: /**
0182: * True if terminatation of an information item is required
0183: */
0184: protected boolean _terminate = false;
0185:
0186: /**
0187: * The current octet that is to be written.
0188: */
0189: protected int _b;
0190:
0191: /**
0192: * The {@link java.io.OutputStream} that the encoded XML infoset (the
0193: * fast infoset document) is written to.
0194: */
0195: protected OutputStream _s;
0196:
0197: /**
0198: * The internal buffer of characters used for the UTF-8 or UTF-16 encoding
0199: * of characters.
0200: */
0201: protected char[] _charBuffer = new char[512];
0202:
0203: /**
0204: * The internal buffer of bytes.
0205: */
0206: protected byte[] _octetBuffer = new byte[1024];
0207:
0208: /**
0209: * The current position in the internal buffer.
0210: */
0211: protected int _octetBufferIndex;
0212:
0213: /**
0214: * The current mark in the internal buffer.
0215: *
0216: * <p>
0217: * If the value of the mark is < 0 then the mark is not set.
0218: */
0219: protected int _markIndex = -1;
0220:
0221: /**
0222: * The limit on the size of [normalized value] of Attribute Information
0223: * Items that will be indexed.
0224: */
0225: protected int attributeValueSizeConstraint = FastInfosetSerializer.ATTRIBUTE_VALUE_SIZE_CONSTRAINT;
0226:
0227: /**
0228: * The limit on the size of character content chunks
0229: * of Character Information Items or Comment Information Items that
0230: * will be indexed.
0231: */
0232: protected int characterContentChunkSizeContraint = FastInfosetSerializer.CHARACTER_CONTENT_CHUNK_SIZE_CONSTRAINT;
0233:
0234: /**
0235: * Default constructor for the Encoder.
0236: */
0237: protected Encoder() {
0238: setCharacterEncodingScheme(_characterEncodingSchemeSystemDefault);
0239: }
0240:
0241: protected Encoder(boolean useLocalNameAsKeyForQualifiedNameLookup) {
0242: setCharacterEncodingScheme(_characterEncodingSchemeSystemDefault);
0243: _useLocalNameAsKeyForQualifiedNameLookup = useLocalNameAsKeyForQualifiedNameLookup;
0244: }
0245:
0246: // FastInfosetSerializer interface
0247:
0248: /**
0249: * {@inheritDoc}
0250: */
0251: public final void setIgnoreComments(boolean ignoreComments) {
0252: _ignoreComments = ignoreComments;
0253: }
0254:
0255: /**
0256: * {@inheritDoc}
0257: */
0258: public final boolean getIgnoreComments() {
0259: return _ignoreComments;
0260: }
0261:
0262: /**
0263: * {@inheritDoc}
0264: */
0265: public final void setIgnoreProcesingInstructions(
0266: boolean ignoreProcesingInstructions) {
0267: _ignoreProcessingInstructions = ignoreProcesingInstructions;
0268: }
0269:
0270: /**
0271: * {@inheritDoc}
0272: */
0273: public final boolean getIgnoreProcesingInstructions() {
0274: return _ignoreProcessingInstructions;
0275: }
0276:
0277: /**
0278: * {@inheritDoc}
0279: */
0280: public final void setIgnoreWhiteSpaceTextContent(
0281: boolean ignoreWhiteSpaceTextContent) {
0282: _ignoreWhiteSpaceTextContent = ignoreWhiteSpaceTextContent;
0283: }
0284:
0285: /**
0286: * {@inheritDoc}
0287: */
0288: public final boolean getIgnoreWhiteSpaceTextContent() {
0289: return _ignoreWhiteSpaceTextContent;
0290: }
0291:
0292: /**
0293: * {@inheritDoc}
0294: */
0295: public void setCharacterEncodingScheme(
0296: String characterEncodingScheme) {
0297: if (characterEncodingScheme.equals(UTF_16BE)) {
0298: _encodingStringsAsUtf8 = false;
0299: _nonIdentifyingStringOnThirdBitCES = EncodingConstants.CHARACTER_CHUNK
0300: | EncodingConstants.CHARACTER_CHUNK_UTF_16_FLAG;
0301: _nonIdentifyingStringOnFirstBitCES = EncodingConstants.NISTRING_UTF_16_FLAG;
0302: } else {
0303: _encodingStringsAsUtf8 = true;
0304: _nonIdentifyingStringOnThirdBitCES = EncodingConstants.CHARACTER_CHUNK;
0305: _nonIdentifyingStringOnFirstBitCES = 0;
0306: }
0307: }
0308:
0309: /**
0310: * {@inheritDoc}
0311: */
0312: public String getCharacterEncodingScheme() {
0313: return (_encodingStringsAsUtf8) ? UTF_8 : UTF_16BE;
0314: }
0315:
0316: /**
0317: * {@inheritDoc}
0318: */
0319: public void setRegisteredEncodingAlgorithms(Map algorithms) {
0320: _registeredEncodingAlgorithms = algorithms;
0321: if (_registeredEncodingAlgorithms == null) {
0322: _registeredEncodingAlgorithms = new HashMap();
0323: }
0324: }
0325:
0326: /**
0327: * {@inheritDoc}
0328: */
0329: public Map getRegisteredEncodingAlgorithms() {
0330: return _registeredEncodingAlgorithms;
0331: }
0332:
0333: /**
0334: * {@inheritDoc}
0335: */
0336: public void setCharacterContentChunkSizeLimit(int size) {
0337: if (size < 0) {
0338: size = 0;
0339: }
0340:
0341: characterContentChunkSizeContraint = size;
0342: }
0343:
0344: /**
0345: * {@inheritDoc}
0346: */
0347: public int getCharacterContentChunkSizeLimit() {
0348: return characterContentChunkSizeContraint;
0349: }
0350:
0351: /**
0352: * {@inheritDoc}
0353: */
0354: public void setAttributeValueSizeLimit(int size) {
0355: if (size < 0) {
0356: size = 0;
0357: }
0358:
0359: attributeValueSizeConstraint = size;
0360: }
0361:
0362: /**
0363: * {@inheritDoc}
0364: */
0365: public int getAttributeValueSizeLimit() {
0366: return attributeValueSizeConstraint;
0367: }
0368:
0369: /**
0370: * {@inheritDoc}
0371: */
0372: public void setExternalVocabulary(ExternalVocabulary v) {
0373: // Create internal serializer vocabulary
0374: _v = new SerializerVocabulary();
0375: // Set the external vocabulary
0376: SerializerVocabulary ev = new SerializerVocabulary(
0377: v.vocabulary, _useLocalNameAsKeyForQualifiedNameLookup);
0378: _v.setExternalVocabulary(v.URI, ev, false);
0379:
0380: _vIsInternal = true;
0381: }
0382:
0383: // End of FastInfosetSerializer interface
0384:
0385: /**
0386: * Reset the encoder for reuse encoding another XML infoset.
0387: */
0388: public void reset() {
0389: _terminate = false;
0390: }
0391:
0392: /**
0393: * Set the OutputStream to encode the XML infoset to a
0394: * fast infoset document.
0395: *
0396: * @param s the OutputStream where the fast infoset document is written to.
0397: */
0398: public void setOutputStream(OutputStream s) {
0399: _octetBufferIndex = 0;
0400: _markIndex = -1;
0401: _s = s;
0402: }
0403:
0404: /**
0405: * Set the SerializerVocabulary to be used for encoding.
0406: *
0407: * @param vocabulary the vocabulary to be used for encoding.
0408: */
0409: public void setVocabulary(SerializerVocabulary vocabulary) {
0410: _v = vocabulary;
0411: _vIsInternal = false;
0412: }
0413:
0414: /**
0415: * Encode the header of a fast infoset document.
0416: *
0417: * @param encodeXmlDecl true if the XML declaration should be encoded.
0418: */
0419: protected final void encodeHeader(boolean encodeXmlDecl)
0420: throws IOException {
0421: if (encodeXmlDecl) {
0422: _s.write(EncodingConstants.XML_DECLARATION_VALUES[0]);
0423: }
0424: _s.write(EncodingConstants.BINARY_HEADER);
0425: }
0426:
0427: /**
0428: * Encode the initial vocabulary of a fast infoset document.
0429: *
0430: */
0431: protected final void encodeInitialVocabulary() throws IOException {
0432: if (_v == null) {
0433: _v = new SerializerVocabulary();
0434: _vIsInternal = true;
0435: } else if (_vIsInternal) {
0436: _v.clear();
0437: }
0438:
0439: if (_v.hasInitialVocabulary()) {
0440: _b = EncodingConstants.DOCUMENT_INITIAL_VOCABULARY_FLAG;
0441: write(_b);
0442:
0443: SerializerVocabulary initialVocabulary = _v
0444: .getReadOnlyVocabulary();
0445:
0446: // TODO check for contents of vocabulary to assign bits
0447: if (initialVocabulary.hasExternalVocabulary()) {
0448: _b = EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG;
0449: write(_b);
0450: write(0);
0451: }
0452:
0453: if (initialVocabulary.hasExternalVocabulary()) {
0454: encodeNonEmptyOctetStringOnSecondBit(_v
0455: .getExternalVocabularyURI().toString());
0456: }
0457:
0458: // TODO check for contents of vocabulary to encode values
0459: } else if (_v.hasExternalVocabulary()) {
0460: _b = EncodingConstants.DOCUMENT_INITIAL_VOCABULARY_FLAG;
0461: write(_b);
0462:
0463: _b = EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG;
0464: write(_b);
0465: write(0);
0466:
0467: encodeNonEmptyOctetStringOnSecondBit(_v
0468: .getExternalVocabularyURI().toString());
0469: } else {
0470: write(0);
0471: }
0472: }
0473:
0474: /**
0475: * Encode the termination of the Document Information Item.
0476: *
0477: */
0478: protected final void encodeDocumentTermination() throws IOException {
0479: encodeElementTermination();
0480: encodeTermination();
0481: _flush();
0482: _s.flush();
0483: }
0484:
0485: /**
0486: * Encode the termination of an Element Information Item.
0487: *
0488: */
0489: protected final void encodeElementTermination() throws IOException {
0490: _terminate = true;
0491: switch (_b) {
0492: case EncodingConstants.TERMINATOR:
0493: _b = EncodingConstants.DOUBLE_TERMINATOR;
0494: break;
0495: case EncodingConstants.DOUBLE_TERMINATOR:
0496: write(EncodingConstants.DOUBLE_TERMINATOR);
0497: default:
0498: _b = EncodingConstants.TERMINATOR;
0499: }
0500: }
0501:
0502: /**
0503: * Encode a termination if required.
0504: *
0505: */
0506: protected final void encodeTermination() throws IOException {
0507: if (_terminate) {
0508: write(_b);
0509: _terminate = false;
0510: }
0511: }
0512:
0513: /**
0514: * Encode a Attribute Information Item that is a namespace declaration.
0515: *
0516: * @param prefix the prefix of the namespace declaration,
0517: * if "" then there is no prefix for the namespace declaration.
0518: * @param uri the URI of the namespace declaration,
0519: * if "" then there is no URI for the namespace declaration.
0520: */
0521: protected final void encodeNamespaceAttribute(String prefix,
0522: String uri) throws IOException {
0523: _b = EncodingConstants.NAMESPACE_ATTRIBUTE;
0524: if (prefix != "") {
0525: _b |= EncodingConstants.NAMESPACE_ATTRIBUTE_PREFIX_FLAG;
0526: }
0527: if (uri != "") {
0528: _b |= EncodingConstants.NAMESPACE_ATTRIBUTE_NAME_FLAG;
0529: }
0530:
0531: // NOTE a prefix with out a namespace name is an undeclaration
0532: // of the namespace bound to the prefix
0533: // TODO needs to investigate how the startPrefixMapping works in
0534: // relation to undeclaration
0535:
0536: write(_b);
0537:
0538: if (prefix != "") {
0539: encodeIdentifyingNonEmptyStringOnFirstBit(prefix, _v.prefix);
0540: }
0541: if (uri != "") {
0542: encodeIdentifyingNonEmptyStringOnFirstBit(uri,
0543: _v.namespaceName);
0544: }
0545: }
0546:
0547: /**
0548: * Encode a chunk of Character Information Items.
0549: *
0550: * @param ch the array of characters.
0551: * @param offset the offset into the array of characters.
0552: * @param length the length of characters.
0553: * @throws ArrayIndexOutOfBoundsException.
0554: */
0555: protected final void encodeCharacters(char[] ch, int offset,
0556: int length) throws IOException {
0557: final boolean addToTable = (length < characterContentChunkSizeContraint) ? true
0558: : false;
0559: encodeNonIdentifyingStringOnThirdBit(ch, offset, length,
0560: _v.characterContentChunk, addToTable, true);
0561: }
0562:
0563: /**
0564: * Encode a chunk of Character Information Items.
0565: *
0566: * If the array of characters is to be indexed (as determined by
0567: * {@link Encoder#characterContentChunkSizeContraint}) then the array is not cloned
0568: * when adding the array to the vocabulary.
0569: *
0570: * @param ch the array of characters.
0571: * @param offset the offset into the array of characters.
0572: * @param length the length of characters.
0573: * @throws ArrayIndexOutOfBoundsException.
0574: */
0575: protected final void encodeCharactersNoClone(char[] ch, int offset,
0576: int length) throws IOException {
0577: final boolean addToTable = (length < characterContentChunkSizeContraint) ? true
0578: : false;
0579: encodeNonIdentifyingStringOnThirdBit(ch, offset, length,
0580: _v.characterContentChunk, addToTable, false);
0581: }
0582:
0583: /**
0584: * Encode a chunk of Character Information Items using a restricted
0585: * alphabet that results in the encoding of a character in 4 bits
0586: * (or two characters per octet).
0587: *
0588: * @param id the restricted alphabet identifier.
0589: * @param table the table mapping characters to 4 bit values.
0590: * @param ch the array of characters.
0591: * @param offset the offset into the array of characters.
0592: * @param length the length of characters.
0593: * @throws ArrayIndexOutOfBoundsException.
0594: */
0595: protected final void encodeFourBitCharacters(int id, int[] table,
0596: char[] ch, int offset, int length)
0597: throws FastInfosetException, IOException {
0598: // This procedure assumes that id <= 64
0599: _b = (length < characterContentChunkSizeContraint) ? EncodingConstants.CHARACTER_CHUNK
0600: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG
0601: | EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
0602: : EncodingConstants.CHARACTER_CHUNK
0603: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG;
0604: write(_b);
0605:
0606: // Encode bottom 6 bits of enoding algorithm id
0607: _b = id << 2;
0608:
0609: encodeNonEmptyFourBitCharacterStringOnSeventhBit(table, ch,
0610: offset, length);
0611: }
0612:
0613: /**
0614: * Encode a chunk of Character Information Items using a restricted
0615: * alphabet table.
0616: *
0617: * @param alphabet the alphabet defining the mapping between characters and
0618: * integer values.
0619: * @param ch the array of characters.
0620: * @param offset the offset into the array of characters.
0621: * @param length the length of characters.
0622: * @throws ArrayIndexOutOfBoundsException.
0623: * @throws FastInfosetException if the alphabet is not present in the
0624: * vocabulary.
0625: */
0626: protected final void encodeAlphabetCharacters(String alphabet,
0627: char[] ch, int offset, int length)
0628: throws FastInfosetException, IOException {
0629: int id = _v.restrictedAlphabet.get(alphabet);
0630: if (id == KeyIntMap.NOT_PRESENT) {
0631: throw new FastInfosetException(CommonResourceBundle
0632: .getInstance().getString(
0633: "message.restrictedAlphabetNotPresent"));
0634: }
0635: id += EncodingConstants.RESTRICTED_ALPHABET_APPLICATION_START;
0636:
0637: _b = (length < characterContentChunkSizeContraint) ? EncodingConstants.CHARACTER_CHUNK
0638: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG
0639: | EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
0640: : EncodingConstants.CHARACTER_CHUNK
0641: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG;
0642: _b |= (id & 0xC0) >> 6;
0643: write(_b);
0644:
0645: // Encode bottom 6 bits of enoding algorithm id
0646: _b = (id & 0x3F) << 2;
0647:
0648: encodeNonEmptyNBitCharacterStringOnSeventhBit(alphabet, ch,
0649: offset, length);
0650: }
0651:
0652: /**
0653: * Encode a Processing Instruction Information Item.
0654: *
0655: * @param target the target of the processing instruction.
0656: * @param data the data of the processing instruction.
0657: */
0658: protected final void encodeProcessingInstruction(String target,
0659: String data) throws IOException {
0660: write(EncodingConstants.PROCESSING_INSTRUCTION);
0661:
0662: // Target
0663: encodeIdentifyingNonEmptyStringOnFirstBit(target,
0664: _v.otherNCName);
0665:
0666: // Data
0667: boolean addToTable = (data.length() < characterContentChunkSizeContraint) ? true
0668: : false;
0669: encodeNonIdentifyingStringOnFirstBit(data, _v.otherString,
0670: addToTable);
0671: }
0672:
0673: /**
0674: * Encode a Comment Information Item.
0675: *
0676: * @param ch the array of characters that is as comment.
0677: * @param offset the offset into the array of characters.
0678: * @param length the length of characters.
0679: * @throws ArrayIndexOutOfBoundsException.
0680: */
0681: protected final void encodeComment(char[] ch, int offset, int length)
0682: throws IOException {
0683: write(EncodingConstants.COMMENT);
0684:
0685: boolean addToTable = (length < characterContentChunkSizeContraint) ? true
0686: : false;
0687: encodeNonIdentifyingStringOnFirstBit(ch, offset, length,
0688: _v.otherString, addToTable, true);
0689: }
0690:
0691: /**
0692: * Encode a Comment Information Item.
0693: *
0694: * If the array of characters that is a comment is to be indexed (as
0695: * determined by {@link Encoder#characterContentChunkSizeContraint}) then
0696: * the array is not cloned when adding the array to the vocabulary.
0697: *
0698: * @param ch the array of characters.
0699: * @param offset the offset into the array of characters.
0700: * @param length the length of characters.
0701: * @throws ArrayIndexOutOfBoundsException.
0702: */
0703: protected final void encodeCommentNoClone(char[] ch, int offset,
0704: int length) throws IOException {
0705: write(EncodingConstants.COMMENT);
0706:
0707: boolean addToTable = (length < characterContentChunkSizeContraint) ? true
0708: : false;
0709: encodeNonIdentifyingStringOnFirstBit(ch, offset, length,
0710: _v.otherString, addToTable, false);
0711: }
0712:
0713: /**
0714: * Encode a qualified name of an Element Informaiton Item on the third bit
0715: * of an octet.
0716: * Implementation of clause C.18 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0717: *
0718: * <p>
0719: * The index of the qualified name will be encoded if the name is present
0720: * in the vocabulary otherwise the qualified name will be encoded literally
0721: * (see {@link #encodeLiteralElementQualifiedNameOnThirdBit}).
0722: *
0723: * @param namespaceURI the namespace URI of the qualified name.
0724: * @param prefix the prefix of the qualified name.
0725: * @param localName the local name of the qualified name.
0726: */
0727: protected final void encodeElementQualifiedNameOnThirdBit(
0728: String namespaceURI, String prefix, String localName)
0729: throws IOException {
0730: LocalNameQualifiedNamesMap.Entry entry = _v.elementName
0731: .obtainEntry(localName);
0732: if (entry._valueIndex > 0) {
0733: QualifiedName[] names = entry._value;
0734: for (int i = 0; i < entry._valueIndex; i++) {
0735: if ((prefix == names[i].prefix || prefix
0736: .equals(names[i].prefix))
0737: && (namespaceURI == names[i].namespaceName || namespaceURI
0738: .equals(names[i].namespaceName))) {
0739: encodeNonZeroIntegerOnThirdBit(names[i].index);
0740: return;
0741: }
0742: }
0743: }
0744:
0745: encodeLiteralElementQualifiedNameOnThirdBit(namespaceURI,
0746: prefix, localName, entry);
0747: }
0748:
0749: /**
0750: * Encode a literal qualified name of an Element Informaiton Item on the
0751: * third bit of an octet.
0752: * Implementation of clause C.18 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0753: *
0754: * @param namespaceURI the namespace URI of the qualified name.
0755: * @param prefix the prefix of the qualified name.
0756: * @param localName the local name of the qualified name.
0757: */
0758: protected final void encodeLiteralElementQualifiedNameOnThirdBit(
0759: String namespaceURI, String prefix, String localName,
0760: LocalNameQualifiedNamesMap.Entry entry) throws IOException {
0761: QualifiedName name = new QualifiedName(prefix, namespaceURI,
0762: localName, "", _v.elementName.getNextIndex());
0763: entry.addQualifiedName(name);
0764:
0765: int namespaceURIIndex = KeyIntMap.NOT_PRESENT;
0766: int prefixIndex = KeyIntMap.NOT_PRESENT;
0767: if (namespaceURI != "") {
0768: namespaceURIIndex = _v.namespaceName.get(namespaceURI);
0769: if (namespaceURIIndex == KeyIntMap.NOT_PRESENT) {
0770: throw new IOException(CommonResourceBundle
0771: .getInstance().getString(
0772: "message.namespaceURINotIndexed",
0773: new Object[] { namespaceURI }));
0774: }
0775:
0776: if (prefix != "") {
0777: prefixIndex = _v.prefix.get(prefix);
0778: if (prefixIndex == KeyIntMap.NOT_PRESENT) {
0779: throw new IOException(CommonResourceBundle
0780: .getInstance().getString(
0781: "message.prefixNotIndexed",
0782: new Object[] { prefix }));
0783: }
0784: }
0785: }
0786:
0787: int localNameIndex = _v.localName.obtainIndex(localName);
0788:
0789: _b |= EncodingConstants.ELEMENT_LITERAL_QNAME_FLAG;
0790: if (namespaceURIIndex >= 0) {
0791: _b |= EncodingConstants.LITERAL_QNAME_NAMESPACE_NAME_FLAG;
0792: if (prefixIndex >= 0) {
0793: _b |= EncodingConstants.LITERAL_QNAME_PREFIX_FLAG;
0794: }
0795: }
0796: write(_b);
0797:
0798: if (namespaceURIIndex >= 0) {
0799: if (prefixIndex >= 0) {
0800: encodeNonZeroIntegerOnSecondBitFirstBitOne(prefixIndex);
0801: }
0802: encodeNonZeroIntegerOnSecondBitFirstBitOne(namespaceURIIndex);
0803: }
0804:
0805: if (localNameIndex >= 0) {
0806: encodeNonZeroIntegerOnSecondBitFirstBitOne(localNameIndex);
0807: } else {
0808: encodeNonEmptyOctetStringOnSecondBit(localName);
0809: }
0810: }
0811:
0812: /**
0813: * Encode a qualified name of an Attribute Informaiton Item on the third bit
0814: * of an octet.
0815: * Implementation of clause C.17 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0816: *
0817: * <p>
0818: * The index of the qualified name will be encoded if the name is present
0819: * in the vocabulary otherwise the qualified name will be encoded literally
0820: * (see {@link #encodeLiteralAttributeQualifiedNameOnSecondBit}).
0821: *
0822: * @param namespaceURI the namespace URI of the qualified name.
0823: * @param prefix the prefix of the qualified name.
0824: * @param localName the local name of the qualified name.
0825: */
0826: protected final void encodeAttributeQualifiedNameOnSecondBit(
0827: String namespaceURI, String prefix, String localName)
0828: throws IOException {
0829: LocalNameQualifiedNamesMap.Entry entry = _v.attributeName
0830: .obtainEntry(localName);
0831: if (entry._valueIndex > 0) {
0832: QualifiedName[] names = entry._value;
0833: for (int i = 0; i < entry._valueIndex; i++) {
0834: if ((prefix == names[i].prefix || prefix
0835: .equals(names[i].prefix))
0836: && (namespaceURI == names[i].namespaceName || namespaceURI
0837: .equals(names[i].namespaceName))) {
0838: encodeNonZeroIntegerOnSecondBitFirstBitZero(names[i].index);
0839: return;
0840: }
0841: }
0842: }
0843:
0844: encodeLiteralAttributeQualifiedNameOnSecondBit(namespaceURI,
0845: prefix, localName, entry);
0846: }
0847:
0848: /**
0849: * Encode a literal qualified name of an Attribute Informaiton Item on the
0850: * third bit of an octet.
0851: * Implementation of clause C.17 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0852: *
0853: * @param namespaceURI the namespace URI of the qualified name.
0854: * @param prefix the prefix of the qualified name.
0855: * @param localName the local name of the qualified name.
0856: */
0857: protected final boolean encodeLiteralAttributeQualifiedNameOnSecondBit(
0858: String namespaceURI, String prefix, String localName,
0859: LocalNameQualifiedNamesMap.Entry entry) throws IOException {
0860: int namespaceURIIndex = KeyIntMap.NOT_PRESENT;
0861: int prefixIndex = KeyIntMap.NOT_PRESENT;
0862: if (namespaceURI != "") {
0863: namespaceURIIndex = _v.namespaceName.get(namespaceURI);
0864: if (namespaceURIIndex == KeyIntMap.NOT_PRESENT) {
0865: if (namespaceURI == EncodingConstants.XMLNS_NAMESPACE_NAME
0866: || namespaceURI
0867: .equals(EncodingConstants.XMLNS_NAMESPACE_NAME)) {
0868: return false;
0869: } else {
0870: throw new IOException(CommonResourceBundle
0871: .getInstance().getString(
0872: "message.namespaceURINotIndexed",
0873: new Object[] { namespaceURI }));
0874: }
0875: }
0876:
0877: if (prefix != "") {
0878: prefixIndex = _v.prefix.get(prefix);
0879: if (prefixIndex == KeyIntMap.NOT_PRESENT) {
0880: throw new IOException(CommonResourceBundle
0881: .getInstance().getString(
0882: "message.prefixNotIndexed",
0883: new Object[] { prefix }));
0884: }
0885: }
0886: }
0887:
0888: int localNameIndex = _v.localName.obtainIndex(localName);
0889:
0890: QualifiedName name = new QualifiedName(prefix, namespaceURI,
0891: localName, "", _v.attributeName.getNextIndex());
0892: entry.addQualifiedName(name);
0893:
0894: _b = EncodingConstants.ATTRIBUTE_LITERAL_QNAME_FLAG;
0895: if (namespaceURI != "") {
0896: _b |= EncodingConstants.LITERAL_QNAME_NAMESPACE_NAME_FLAG;
0897: if (prefix != "") {
0898: _b |= EncodingConstants.LITERAL_QNAME_PREFIX_FLAG;
0899: }
0900: }
0901:
0902: write(_b);
0903:
0904: if (namespaceURIIndex >= 0) {
0905: if (prefixIndex >= 0) {
0906: encodeNonZeroIntegerOnSecondBitFirstBitOne(prefixIndex);
0907: }
0908: encodeNonZeroIntegerOnSecondBitFirstBitOne(namespaceURIIndex);
0909: } else if (namespaceURI != "") {
0910: // XML prefix and namespace name
0911: encodeNonEmptyOctetStringOnSecondBit("xml");
0912: encodeNonEmptyOctetStringOnSecondBit("http://www.w3.org/XML/1998/namespace");
0913: }
0914:
0915: if (localNameIndex >= 0) {
0916: encodeNonZeroIntegerOnSecondBitFirstBitOne(localNameIndex);
0917: } else {
0918: encodeNonEmptyOctetStringOnSecondBit(localName);
0919: }
0920:
0921: return true;
0922: }
0923:
0924: /**
0925: * Encode a non identifying string on the first bit of an octet.
0926: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0927: *
0928: * @param s the string to encode
0929: * @param map the vocabulary table of strings to indexes.
0930: * @param addToTable true if the string should be added to the vocabulary
0931: * table (if not already present in the table).
0932: */
0933: protected final void encodeNonIdentifyingStringOnFirstBit(String s,
0934: StringIntMap map, boolean addToTable) throws IOException {
0935: if (s == null || s.length() == 0) {
0936: // C.26 an index (first bit '1') with seven '1' bits for an empty string
0937: write(0xFF);
0938: } else {
0939: if (addToTable) {
0940: int index = map.obtainIndex(s);
0941: if (index == KeyIntMap.NOT_PRESENT) {
0942: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
0943: | _nonIdentifyingStringOnFirstBitCES;
0944: encodeNonEmptyCharacterStringOnFifthBit(s);
0945: } else {
0946: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
0947: }
0948: } else {
0949: _b = _nonIdentifyingStringOnFirstBitCES;
0950: encodeNonEmptyCharacterStringOnFifthBit(s);
0951: }
0952: }
0953: }
0954:
0955: /**
0956: * Encode a non identifying string on the first bit of an octet.
0957: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0958: *
0959: * @param s the string to encode
0960: * @param map the vocabulary table of character arrays to indexes.
0961: * @param addToTable true if the string should be added to the vocabulary
0962: * table (if not already present in the table).
0963: */
0964: protected final void encodeNonIdentifyingStringOnFirstBit(String s,
0965: CharArrayIntMap map, boolean addToTable) throws IOException {
0966: if (s == null || s.length() == 0) {
0967: // C.26 an index (first bit '1') with seven '1' bits for an empty string
0968: write(0xFF);
0969: } else {
0970: if (addToTable) {
0971: final char[] ch = s.toCharArray();
0972: final int length = s.length();
0973: int index = map.obtainIndex(ch, 0, length, false);
0974: if (index == KeyIntMap.NOT_PRESENT) {
0975: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
0976: | _nonIdentifyingStringOnFirstBitCES;
0977: encodeNonEmptyCharacterStringOnFifthBit(ch, 0,
0978: length);
0979: } else {
0980: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
0981: }
0982: } else {
0983: _b = _nonIdentifyingStringOnFirstBitCES;
0984: encodeNonEmptyCharacterStringOnFifthBit(s);
0985: }
0986: }
0987: }
0988:
0989: /**
0990: * Encode a non identifying string on the first bit of an octet.
0991: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0992: *
0993: * @param ch the array of characters.
0994: * @param offset the offset into the array of characters.
0995: * @param length the length of characters.
0996: * @param map the vocabulary table of character arrays to indexes.
0997: * @param addToTable true if the string should be added to the vocabulary
0998: * table (if not already present in the table).
0999: * @param clone true if the array of characters should be cloned if added
1000: * to the vocabulary table.
1001: */
1002: protected final void encodeNonIdentifyingStringOnFirstBit(
1003: char[] ch, int offset, int length, CharArrayIntMap map,
1004: boolean addToTable, boolean clone) throws IOException {
1005: if (length == 0) {
1006: // C.26 an index (first bit '1') with seven '1' bits for an empty string
1007: write(0xFF);
1008: } else {
1009: if (addToTable) {
1010: int index = map.obtainIndex(ch, offset, length, clone);
1011: if (index == KeyIntMap.NOT_PRESENT) {
1012: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
1013: | _nonIdentifyingStringOnFirstBitCES;
1014: encodeNonEmptyCharacterStringOnFifthBit(ch, offset,
1015: length);
1016: } else {
1017: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1018: }
1019: } else {
1020: _b = _nonIdentifyingStringOnFirstBitCES;
1021: encodeNonEmptyCharacterStringOnFifthBit(ch, offset,
1022: length);
1023: }
1024: }
1025: }
1026:
1027: /**
1028: * Encode a non identifying string on the first bit of an octet as binary
1029: * data using an encoding algorithm.
1030: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1031: *
1032: * @param URI the encoding algorithm URI. If the URI == null then the
1033: * encoding algorithm identifier takes precendence.
1034: * @param id the encoding algorithm identifier.
1035: * @param data the data to be encoded using an encoding algorithm.
1036: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1037: * present in the vocabulary, or the encoding algorithm identifier
1038: * is not with the required range.
1039: */
1040: protected final void encodeNonIdentifyingStringOnFirstBit(
1041: String URI, int id, Object data)
1042: throws FastInfosetException, IOException {
1043: if (URI != null) {
1044: id = _v.encodingAlgorithm.get(URI);
1045: if (id == KeyIntMap.NOT_PRESENT) {
1046: throw new EncodingAlgorithmException(
1047: CommonResourceBundle.getInstance().getString(
1048: "message.EncodingAlgorithmURI",
1049: new Object[] { URI }));
1050: }
1051: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1052:
1053: EncodingAlgorithm ea = (EncodingAlgorithm) _registeredEncodingAlgorithms
1054: .get(URI);
1055: if (ea != null) {
1056: encodeAIIObjectAlgorithmData(id, data, ea);
1057: } else {
1058: if (data instanceof byte[]) {
1059: byte[] d = (byte[]) data;
1060: encodeAIIOctetAlgorithmData(id, d, 0, d.length);
1061: } else {
1062: throw new EncodingAlgorithmException(
1063: CommonResourceBundle
1064: .getInstance()
1065: .getString(
1066: "message.nullEncodingAlgorithmURI"));
1067: }
1068: }
1069: } else if (id <= EncodingConstants.ENCODING_ALGORITHM_BUILTIN_END) {
1070: BuiltInEncodingAlgorithm a = BuiltInEncodingAlgorithmFactory.table[id];
1071: int length = 0;
1072: switch (id) {
1073: case EncodingAlgorithmIndexes.HEXADECIMAL:
1074: length = ((byte[]) data).length;
1075: break;
1076: case EncodingAlgorithmIndexes.BASE64:
1077: length = ((byte[]) data).length;
1078: break;
1079: case EncodingAlgorithmIndexes.SHORT:
1080: length = ((short[]) data).length;
1081: break;
1082: case EncodingAlgorithmIndexes.INT:
1083: length = ((int[]) data).length;
1084: break;
1085: case EncodingAlgorithmIndexes.LONG:
1086: length = ((long[]) data).length;
1087: break;
1088: case EncodingAlgorithmIndexes.BOOLEAN:
1089: length = ((boolean[]) data).length;
1090: break;
1091: case EncodingAlgorithmIndexes.FLOAT:
1092: length = ((float[]) data).length;
1093: break;
1094: case EncodingAlgorithmIndexes.DOUBLE:
1095: length = ((double[]) data).length;
1096: break;
1097: case EncodingAlgorithmIndexes.UUID:
1098: length = ((long[]) data).length;
1099: break;
1100: case EncodingAlgorithmIndexes.CDATA:
1101: throw new UnsupportedOperationException(
1102: CommonResourceBundle.getInstance().getString(
1103: "message.CDATA"));
1104: default:
1105: throw new EncodingAlgorithmException(
1106: CommonResourceBundle.getInstance().getString(
1107: "message.UnsupportedBuiltInAlgorithm",
1108: new Object[] { new Integer(id) }));
1109: }
1110: encodeAIIBuiltInAlgorithmData(id, data, 0, length);
1111: } else if (id >= EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START) {
1112: if (data instanceof byte[]) {
1113: byte[] d = (byte[]) data;
1114: encodeAIIOctetAlgorithmData(id, d, 0, d.length);
1115: } else {
1116: throw new EncodingAlgorithmException(
1117: CommonResourceBundle.getInstance().getString(
1118: "message.nullEncodingAlgorithmURI"));
1119: }
1120: } else {
1121: throw new EncodingAlgorithmException(CommonResourceBundle
1122: .getInstance().getString(
1123: "message.identifiers10to31Reserved"));
1124: }
1125: }
1126:
1127: /**
1128: * Encode the [normalized value] of an Attribute Information Item using
1129: * using an encoding algorithm.
1130: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1131: *
1132: * @param id the encoding algorithm identifier.
1133: * @param d the data, as an array of bytes, to be encoded.
1134: * @param offset the offset into the array of bytes.
1135: * @param length the length of bytes.
1136: */
1137: protected final void encodeAIIOctetAlgorithmData(int id, byte[] d,
1138: int offset, int length) throws IOException {
1139: // Encode identification and top four bits of encoding algorithm id
1140: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1141: | ((id & 0xF0) >> 4));
1142:
1143: // Encode bottom 4 bits of enoding algorithm id
1144: _b = (id & 0x0F) << 4;
1145:
1146: // Encode the length
1147: encodeNonZeroOctetStringLengthOnFifthBit(length);
1148:
1149: write(d, offset, length);
1150: }
1151:
1152: /**
1153: * Encode the [normalized value] of an Attribute Information Item using
1154: * using an encoding algorithm.
1155: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1156: *
1157: * @param id the encoding algorithm identifier.
1158: * @param data the data to be encoded using an encoding algorithm.
1159: * @param ea the encoding algorithm to use to encode the data into an
1160: * array of bytes.
1161: */
1162: protected final void encodeAIIObjectAlgorithmData(int id,
1163: Object data, EncodingAlgorithm ea)
1164: throws FastInfosetException, IOException {
1165: // Encode identification and top four bits of encoding algorithm id
1166: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1167: | ((id & 0xF0) >> 4));
1168:
1169: // Encode bottom 4 bits of enoding algorithm id
1170: _b = (id & 0x0F) << 4;
1171:
1172: _encodingBufferOutputStream.reset();
1173: ea.encodeToOutputStream(data, _encodingBufferOutputStream);
1174: encodeNonZeroOctetStringLengthOnFifthBit(_encodingBufferIndex);
1175: write(_encodingBuffer, _encodingBufferIndex);
1176: }
1177:
1178: /**
1179: * Encode the [normalized value] of an Attribute Information Item using
1180: * using a built in encoding algorithm.
1181: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1182: *
1183: * @param id the built in encoding algorithm identifier.
1184: * @param data the data to be encoded using an encoding algorithm. The data
1185: * represents an array of items specified by the encoding algorithm
1186: * identifier
1187: * @param offset the offset into the array of bytes.
1188: * @param length the length of bytes.
1189: */
1190: protected final void encodeAIIBuiltInAlgorithmData(int id,
1191: Object data, int offset, int length) throws IOException {
1192: // Encode identification and top four bits of encoding algorithm id
1193: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1194: | ((id & 0xF0) >> 4));
1195:
1196: // Encode bottom 4 bits of enoding algorithm id
1197: _b = (id & 0x0F) << 4;
1198:
1199: final int octetLength = BuiltInEncodingAlgorithmFactory.table[id]
1200: .getOctetLengthFromPrimitiveLength(length);
1201:
1202: encodeNonZeroOctetStringLengthOnFifthBit(octetLength);
1203:
1204: ensureSize(octetLength);
1205: BuiltInEncodingAlgorithmFactory.table[id].encodeToBytes(data,
1206: offset, length, _octetBuffer, _octetBufferIndex);
1207: _octetBufferIndex += octetLength;
1208: }
1209:
1210: /**
1211: * Encode a non identifying string on the third bit of an octet.
1212: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1213: *
1214: * @param ch the array of characters.
1215: * @param offset the offset into the array of characters.
1216: * @param length the length of characters.
1217: * @param map the vocabulary table of character arrays to indexes.
1218: * @param addToTable true if the array of characters should be added to the vocabulary
1219: * table (if not already present in the table).
1220: * @param clone true if the array of characters should be cloned if added
1221: * to the vocabulary table.
1222: */
1223: protected final void encodeNonIdentifyingStringOnThirdBit(
1224: char[] ch, int offset, int length, CharArrayIntMap map,
1225: boolean addToTable, boolean clone) throws IOException {
1226: // length cannot be zero since sequence of CIIs has to be > 0
1227:
1228: if (addToTable) {
1229: int index = map.obtainIndex(ch, offset, length, clone);
1230: if (index == KeyIntMap.NOT_PRESENT) {
1231: _b = EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
1232: | _nonIdentifyingStringOnThirdBitCES;
1233: encodeNonEmptyCharacterStringOnSeventhBit(ch, offset,
1234: length);
1235: } else {
1236: _b = EncodingConstants.CHARACTER_CHUNK | 0x20;
1237: encodeNonZeroIntegerOnFourthBit(index);
1238: }
1239: } else {
1240: _b = _nonIdentifyingStringOnThirdBitCES;
1241: encodeNonEmptyCharacterStringOnSeventhBit(ch, offset,
1242: length);
1243: }
1244: }
1245:
1246: /**
1247: * Encode a non identifying string on the third bit of an octet as binary
1248: * data using an encoding algorithm.
1249: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1250: *
1251: * @param URI the encoding algorithm URI. If the URI == null then the
1252: * encoding algorithm identifier takes precendence.
1253: * @param id the encoding algorithm identifier.
1254: * @param data the data to be encoded using an encoding algorithm.
1255: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1256: * present in the vocabulary, or the encoding algorithm identifier
1257: * is not with the required range.
1258: */
1259: protected final void encodeNonIdentifyingStringOnThirdBit(
1260: String URI, int id, Object data)
1261: throws FastInfosetException, IOException {
1262: if (URI != null) {
1263: id = _v.encodingAlgorithm.get(URI);
1264: if (id == KeyIntMap.NOT_PRESENT) {
1265: throw new EncodingAlgorithmException(
1266: CommonResourceBundle.getInstance().getString(
1267: "message.EncodingAlgorithmURI",
1268: new Object[] { URI }));
1269: }
1270: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1271:
1272: EncodingAlgorithm ea = (EncodingAlgorithm) _registeredEncodingAlgorithms
1273: .get(URI);
1274: if (ea != null) {
1275: encodeCIIObjectAlgorithmData(id, data, ea);
1276: } else {
1277: if (data instanceof byte[]) {
1278: byte[] d = (byte[]) data;
1279: encodeCIIOctetAlgorithmData(id, d, 0, d.length);
1280: } else {
1281: throw new EncodingAlgorithmException(
1282: CommonResourceBundle
1283: .getInstance()
1284: .getString(
1285: "message.nullEncodingAlgorithmURI"));
1286: }
1287: }
1288: } else if (id <= EncodingConstants.ENCODING_ALGORITHM_BUILTIN_END) {
1289: int length = 0;
1290: switch (id) {
1291: case EncodingAlgorithmIndexes.HEXADECIMAL:
1292: length = ((byte[]) data).length;
1293: break;
1294: case EncodingAlgorithmIndexes.BASE64:
1295: length = ((byte[]) data).length;
1296: break;
1297: case EncodingAlgorithmIndexes.SHORT:
1298: length = ((short[]) data).length;
1299: break;
1300: case EncodingAlgorithmIndexes.INT:
1301: length = ((int[]) data).length;
1302: break;
1303: case EncodingAlgorithmIndexes.LONG:
1304: length = ((int[]) data).length;
1305: break;
1306: case EncodingAlgorithmIndexes.BOOLEAN:
1307: length = ((boolean[]) data).length;
1308: break;
1309: case EncodingAlgorithmIndexes.FLOAT:
1310: length = ((float[]) data).length;
1311: break;
1312: case EncodingAlgorithmIndexes.DOUBLE:
1313: length = ((double[]) data).length;
1314: break;
1315: case EncodingAlgorithmIndexes.UUID:
1316: length = ((int[]) data).length;
1317: break;
1318: case EncodingAlgorithmIndexes.CDATA:
1319: throw new UnsupportedOperationException(
1320: CommonResourceBundle.getInstance().getString(
1321: "message.CDATA"));
1322: default:
1323: throw new EncodingAlgorithmException(
1324: CommonResourceBundle.getInstance().getString(
1325: "message.UnsupportedBuiltInAlgorithm",
1326: new Object[] { new Integer(id) }));
1327: }
1328: encodeCIIBuiltInAlgorithmData(id, data, 0, length);
1329: } else if (id >= EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START) {
1330: if (data instanceof byte[]) {
1331: byte[] d = (byte[]) data;
1332: encodeCIIOctetAlgorithmData(id, d, 0, d.length);
1333: } else {
1334: throw new EncodingAlgorithmException(
1335: CommonResourceBundle.getInstance().getString(
1336: "message.nullEncodingAlgorithmURI"));
1337: }
1338: } else {
1339: throw new EncodingAlgorithmException(CommonResourceBundle
1340: .getInstance().getString(
1341: "message.identifiers10to31Reserved"));
1342: }
1343: }
1344:
1345: /**
1346: * Encode a non identifying string on the third bit of an octet as binary
1347: * data using an encoding algorithm.
1348: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1349: *
1350: * @param URI the encoding algorithm URI. If the URI == null then the
1351: * encoding algorithm identifier takes precendence.
1352: * @param id the encoding algorithm identifier.
1353: * @param d the data, as an array of bytes, to be encoded.
1354: * @param offset the offset into the array of bytes.
1355: * @param length the length of bytes.
1356: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1357: * present in the vocabulary.
1358: */
1359: protected final void encodeNonIdentifyingStringOnThirdBit(
1360: String URI, int id, byte[] d, int offset, int length)
1361: throws FastInfosetException, IOException {
1362: if (URI != null) {
1363: id = _v.encodingAlgorithm.get(URI);
1364: if (id == KeyIntMap.NOT_PRESENT) {
1365: throw new EncodingAlgorithmException(
1366: CommonResourceBundle.getInstance().getString(
1367: "message.EncodingAlgorithmURI",
1368: new Object[] { URI }));
1369: }
1370: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1371: }
1372:
1373: encodeCIIOctetAlgorithmData(id, d, offset, length);
1374: }
1375:
1376: /**
1377: * Encode a chunk of Character Information Items using
1378: * using an encoding algorithm.
1379: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1380: *
1381: * @param id the encoding algorithm identifier.
1382: * @param d the data, as an array of bytes, to be encoded.
1383: * @param offset the offset into the array of bytes.
1384: * @param length the length of bytes.
1385: */
1386: protected final void encodeCIIOctetAlgorithmData(int id, byte[] d,
1387: int offset, int length) throws IOException {
1388: // Encode identification and top two bits of encoding algorithm id
1389: write(EncodingConstants.CHARACTER_CHUNK
1390: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1391: | ((id & 0xC0) >> 6));
1392:
1393: // Encode bottom 6 bits of enoding algorithm id
1394: _b = (id & 0x3F) << 2;
1395:
1396: // Encode the length
1397: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1398:
1399: write(d, offset, length);
1400: }
1401:
1402: /**
1403: * Encode a chunk of Character Information Items using
1404: * using an encoding algorithm.
1405: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1406: *
1407: * @param id the encoding algorithm identifier.
1408: * @param data the data to be encoded using an encoding algorithm.
1409: * @param ea the encoding algorithm to use to encode the data into an
1410: * array of bytes.
1411: */
1412: protected final void encodeCIIObjectAlgorithmData(int id,
1413: Object data, EncodingAlgorithm ea)
1414: throws FastInfosetException, IOException {
1415: // Encode identification and top two bits of encoding algorithm id
1416: write(EncodingConstants.CHARACTER_CHUNK
1417: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1418: | ((id & 0xC0) >> 6));
1419:
1420: // Encode bottom 6 bits of enoding algorithm id
1421: _b = (id & 0x3F) << 2;
1422:
1423: _encodingBufferOutputStream.reset();
1424: ea.encodeToOutputStream(data, _encodingBufferOutputStream);
1425: encodeNonZeroOctetStringLengthOnSenventhBit(_encodingBufferIndex);
1426: write(_encodingBuffer, _encodingBufferIndex);
1427: }
1428:
1429: /**
1430: * Encode a chunk of Character Information Items using
1431: * using an encoding algorithm.
1432: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1433: *
1434: * @param id the built in encoding algorithm identifier.
1435: * @param data the data to be encoded using an encoding algorithm. The data
1436: * represents an array of items specified by the encoding algorithm
1437: * identifier
1438: * @param offset the offset into the array of bytes.
1439: * @param length the length of bytes.
1440: */
1441: protected final void encodeCIIBuiltInAlgorithmData(int id,
1442: Object data, int offset, int length)
1443: throws FastInfosetException, IOException {
1444: // Encode identification and top two bits of encoding algorithm id
1445: write(EncodingConstants.CHARACTER_CHUNK
1446: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1447: | ((id & 0xC0) >> 6));
1448:
1449: // Encode bottom 6 bits of enoding algorithm id
1450: _b = (id & 0x3F) << 2;
1451:
1452: final int octetLength = BuiltInEncodingAlgorithmFactory.table[id]
1453: .getOctetLengthFromPrimitiveLength(length);
1454:
1455: encodeNonZeroOctetStringLengthOnSenventhBit(octetLength);
1456:
1457: ensureSize(octetLength);
1458: BuiltInEncodingAlgorithmFactory.table[id].encodeToBytes(data,
1459: offset, length, _octetBuffer, _octetBufferIndex);
1460: _octetBufferIndex += octetLength;
1461: }
1462:
1463: /**
1464: * Encode a chunk of Character Information Items using
1465: * using the CDATA built in encoding algorithm.
1466: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1467: *
1468: * @param ch the array of characters.
1469: * @param offset the offset into the array of characters.
1470: * @param length the length of characters.
1471: */
1472: protected final void encodeCIIBuiltInAlgorithmDataAsCDATA(
1473: char[] ch, int offset, int length)
1474: throws FastInfosetException, IOException {
1475: // Encode identification and top two bits of encoding algorithm id
1476: write(EncodingConstants.CHARACTER_CHUNK
1477: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG);
1478:
1479: // Encode bottom 6 bits of enoding algorithm id
1480: _b = EncodingAlgorithmIndexes.CDATA << 2;
1481:
1482: length = encodeUTF8String(ch, offset, length);
1483: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1484: write(_encodingBuffer, length);
1485: }
1486:
1487: /**
1488: * Encode a non empty identifying string on the first bit of an octet.
1489: * Implementation of clause C.13 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1490: *
1491: * @param s the identifying string.
1492: * @param map the vocabulary table to use to determin the index of the
1493: * identifying string
1494: */
1495: protected final void encodeIdentifyingNonEmptyStringOnFirstBit(
1496: String s, StringIntMap map) throws IOException {
1497: int index = map.obtainIndex(s);
1498: if (index == KeyIntMap.NOT_PRESENT) {
1499: // _b = 0;
1500: encodeNonEmptyOctetStringOnSecondBit(s);
1501: } else {
1502: // _b = 0x80;
1503: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1504: }
1505: }
1506:
1507: /**
1508: * Encode a non empty string on the second bit of an octet using the UTF-8
1509: * encoding.
1510: * Implementation of clause C.22 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1511: *
1512: * @param s the string.
1513: */
1514: protected final void encodeNonEmptyOctetStringOnSecondBit(String s)
1515: throws IOException {
1516: final int length = encodeUTF8String(s);
1517: encodeNonZeroOctetStringLengthOnSecondBit(length);
1518: write(_encodingBuffer, length);
1519: }
1520:
1521: /**
1522: * Encode the length of a UTF-8 encoded string on the second bit of an octet.
1523: * Implementation of clause C.22 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1524: *
1525: * @param length the length to encode.
1526: */
1527: protected final void encodeNonZeroOctetStringLengthOnSecondBit(
1528: int length) throws IOException {
1529: if (length < EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT) {
1530: // [1, 64]
1531: write(length - 1);
1532: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT) {
1533: // [65, 320]
1534: write(EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_FLAG); // 010 00000
1535: write(length
1536: - EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT);
1537: } else {
1538: // [321, 4294967296]
1539: write(EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_LARGE_FLAG); // 0110 0000
1540: length -= EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
1541: write(length >>> 24);
1542: write((length >> 16) & 0xFF);
1543: write((length >> 8) & 0xFF);
1544: write(length & 0xFF);
1545: }
1546: }
1547:
1548: /**
1549: * Encode a non empty string on the fifth bit of an octet using the UTF-8
1550: * or UTF-16 encoding.
1551: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1552: *
1553: * @param s the string.
1554: */
1555: protected final void encodeNonEmptyCharacterStringOnFifthBit(
1556: String s) throws IOException {
1557: final int length = (_encodingStringsAsUtf8) ? encodeUTF8String(s)
1558: : encodeUtf16String(s);
1559: encodeNonZeroOctetStringLengthOnFifthBit(length);
1560: write(_encodingBuffer, length);
1561: }
1562:
1563: /**
1564: * Encode a non empty string on the fifth bit of an octet using the UTF-8
1565: * or UTF-16 encoding.
1566: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1567: *
1568: * @param ch the array of characters.
1569: * @param offset the offset into the array of characters.
1570: * @param length the length of characters.
1571: */
1572: protected final void encodeNonEmptyCharacterStringOnFifthBit(
1573: char[] ch, int offset, int length) throws IOException {
1574: length = (_encodingStringsAsUtf8) ? encodeUTF8String(ch,
1575: offset, length) : encodeUtf16String(ch, offset, length);
1576: encodeNonZeroOctetStringLengthOnFifthBit(length);
1577: write(_encodingBuffer, length);
1578: }
1579:
1580: /**
1581: * Encode the length of a UTF-8 or UTF-16 encoded string on the fifth bit
1582: * of an octet.
1583: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1584: *
1585: * @param length the length to encode.
1586: */
1587: protected final void encodeNonZeroOctetStringLengthOnFifthBit(
1588: int length) throws IOException {
1589: if (length < EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT) {
1590: // [1, 8]
1591: write(_b | (length - 1));
1592: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT) {
1593: // [9, 264]
1594: write(_b
1595: | EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_FLAG); // 000010 00
1596: write(length
1597: - EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT);
1598: } else {
1599: // [265, 4294967296]
1600: write(_b
1601: | EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_LARGE_FLAG); // 000011 00
1602: length -= EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT;
1603: write(length >>> 24);
1604: write((length >> 16) & 0xFF);
1605: write((length >> 8) & 0xFF);
1606: write(length & 0xFF);
1607: }
1608: }
1609:
1610: /**
1611: * Encode a non empty string on the seventh bit of an octet using the UTF-8
1612: * or UTF-16 encoding.
1613: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1614: *
1615: * @param ch the array of characters.
1616: * @param offset the offset into the array of characters.
1617: * @param length the length of characters.
1618: */
1619: protected final void encodeNonEmptyCharacterStringOnSeventhBit(
1620: char[] ch, int offset, int length) throws IOException {
1621: length = (_encodingStringsAsUtf8) ? encodeUTF8String(ch,
1622: offset, length) : encodeUtf16String(ch, offset, length);
1623: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1624: write(_encodingBuffer, length);
1625: }
1626:
1627: /**
1628: * Encode a non empty string on the seventh bit of an octet using a restricted
1629: * alphabet that results in the encoding of a character in 4 bits
1630: * (or two characters per octet).
1631: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1632: *
1633: * @param table the table mapping characters to 4 bit values.
1634: * @param ch the array of characters.
1635: * @param offset the offset into the array of characters.
1636: * @param length the length of characters.
1637: */
1638: protected final void encodeNonEmptyFourBitCharacterStringOnSeventhBit(
1639: int[] table, char[] ch, int offset, int length)
1640: throws FastInfosetException, IOException {
1641: final int octetPairLength = length / 2;
1642: final int octetSingleLength = length % 2;
1643:
1644: // Encode the length
1645: encodeNonZeroOctetStringLengthOnSenventhBit(octetPairLength
1646: + octetSingleLength);
1647:
1648: ensureSize(octetPairLength + octetSingleLength);
1649: // Encode all pairs
1650: int v = 0;
1651: for (int i = 0; i < octetPairLength; i++) {
1652: v = (table[ch[offset++]] << 4) | table[ch[offset++]];
1653: if (v < 0) {
1654: throw new FastInfosetException(CommonResourceBundle
1655: .getInstance().getString(
1656: "message.characterOutofAlphabetRange"));
1657: }
1658: _octetBuffer[_octetBufferIndex++] = (byte) v;
1659: }
1660: // Encode single character at end with termination bits
1661: if (octetSingleLength == 1) {
1662: v = (table[ch[offset]] << 4) | 0x0F;
1663: if (v < 0) {
1664: throw new FastInfosetException(CommonResourceBundle
1665: .getInstance().getString(
1666: "message.characterOutofAlphabetRange"));
1667: }
1668: _octetBuffer[_octetBufferIndex++] = (byte) v;
1669: }
1670: }
1671:
1672: /**
1673: * Encode a non empty string on the seventh bit of an octet using a restricted
1674: * alphabet table.
1675: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1676: *
1677: * @param alphabet the alphabet defining the mapping between characters and
1678: * integer values.
1679: * @param ch the array of characters.
1680: * @param offset the offset into the array of characters.
1681: * @param length the length of characters.
1682: */
1683: protected final void encodeNonEmptyNBitCharacterStringOnSeventhBit(
1684: String alphabet, char[] ch, int offset, int length)
1685: throws FastInfosetException, IOException {
1686: int bitsPerCharacter = 1;
1687: while ((1 << bitsPerCharacter) <= alphabet.length()) {
1688: bitsPerCharacter++;
1689: }
1690: final int terminatingValue = (1 << bitsPerCharacter) - 1;
1691:
1692: final int bits = length * bitsPerCharacter;
1693: final int octets = bits / 8;
1694: final int bitsOfLastOctet = bits % 8;
1695: final int totalOctets = octets
1696: + ((bitsOfLastOctet > 0) ? 1 : 0);
1697:
1698: // Encode the length
1699: encodeNonZeroOctetStringLengthOnSenventhBit(totalOctets);
1700:
1701: resetBits();
1702: ensureSize(totalOctets);
1703: int v = 0;
1704: for (int i = 0; i < length; i++) {
1705: final char c = ch[offset + i];
1706: // This is grotesquely slow, need to use hash table of character to int value
1707: for (v = 0; v < alphabet.length(); v++) {
1708: if (c == alphabet.charAt(v)) {
1709: break;
1710: }
1711: }
1712: if (v == alphabet.length()) {
1713: throw new FastInfosetException(CommonResourceBundle
1714: .getInstance().getString(
1715: "message.characterOutofAlphabetRange"));
1716: }
1717: writeBits(bitsPerCharacter, v);
1718: }
1719:
1720: if (bitsOfLastOctet > 0) {
1721: _b |= (1 << (8 - bitsOfLastOctet)) - 1;
1722: write(_b);
1723: }
1724: }
1725:
1726: private int _bitsLeftInOctet;
1727:
1728: private final void resetBits() {
1729: _bitsLeftInOctet = 8;
1730: _b = 0;
1731: }
1732:
1733: private final void writeBits(int bits, int v) throws IOException {
1734: while (bits > 0) {
1735: final int bit = (v & (1 << --bits)) > 0 ? 1 : 0;
1736: _b |= bit << (--_bitsLeftInOctet);
1737: if (_bitsLeftInOctet == 0) {
1738: write(_b);
1739: _bitsLeftInOctet = 8;
1740: _b = 0;
1741: }
1742: }
1743: }
1744:
1745: /**
1746: * Encode the length of a encoded string on the seventh bit
1747: * of an octet.
1748: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1749: *
1750: * @param length the length to encode.
1751: */
1752: protected final void encodeNonZeroOctetStringLengthOnSenventhBit(
1753: int length) throws IOException {
1754: if (length < EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT) {
1755: // [1, 2]
1756: write(_b | (length - 1));
1757: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT) {
1758: // [3, 258]
1759: write(_b
1760: | EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_FLAG); // 00000010
1761: write(length
1762: - EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT);
1763: } else {
1764: // [259, 4294967296]
1765: write(_b
1766: | EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_LARGE_FLAG); // 00000011
1767: length -= EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT;
1768: write(length >>> 24);
1769: write((length >> 16) & 0xFF);
1770: write((length >> 8) & 0xFF);
1771: write(length & 0xFF);
1772: }
1773: }
1774:
1775: /**
1776: * Encode a non zero integer on the second bit of an octet, setting
1777: * the first bit to 1.
1778: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1779: *
1780: * <p>
1781: * The first bit of the first octet is set, as specified in clause C.13 of
1782: * ITU-T Rec. X.891 | ISO/IEC 24824-1
1783: *
1784: * @param i The integer to encode, which is a member of the interval
1785: * [0, 1048575]. In the specification the interval is [1, 1048576]
1786: *
1787: */
1788: protected final void encodeNonZeroIntegerOnSecondBitFirstBitOne(
1789: int i) throws IOException {
1790: if (i < EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT) {
1791: // [1, 64] ( [0, 63] ) 6 bits
1792: write(0x80 | i);
1793: } else if (i < EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT) {
1794: // [65, 8256] ( [64, 8255] ) 13 bits
1795: i -= EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1796: _b = (0x80 | EncodingConstants.INTEGER_2ND_BIT_MEDIUM_FLAG)
1797: | (i >> 8); // 010 00000
1798: // _b = 0xC0 | (i >> 8); // 010 00000
1799: write(_b);
1800: write(i & 0xFF);
1801: } else {
1802: // [8257, 1048576] ( [8256, 1048575] ) 20 bits
1803: i -= EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1804: _b = (0x80 | EncodingConstants.INTEGER_2ND_BIT_LARGE_FLAG)
1805: | (i >> 16); // 0110 0000
1806: // _b = 0xE0 | (i >> 16); // 0110 0000
1807: write(_b);
1808: write((i >> 8) & 0xFF);
1809: write(i & 0xFF);
1810: }
1811: }
1812:
1813: /**
1814: * Encode a non zero integer on the second bit of an octet, setting
1815: * the first bit to 0.
1816: * Implementation of clause C.25 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1817: *
1818: * <p>
1819: * The first bit of the first octet is set, as specified in clause C.13 of
1820: * ITU-T Rec. X.891 | ISO/IEC 24824-1
1821: *
1822: * @param i The integer to encode, which is a member of the interval
1823: * [0, 1048575]. In the specification the interval is [1, 1048576]
1824: *
1825: */
1826: protected final void encodeNonZeroIntegerOnSecondBitFirstBitZero(
1827: int i) throws IOException {
1828: if (i < EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT) {
1829: // [1, 64] ( [0, 63] ) 6 bits
1830: write(i);
1831: } else if (i < EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT) {
1832: // [65, 8256] ( [64, 8255] ) 13 bits
1833: i -= EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
1834: _b = EncodingConstants.INTEGER_2ND_BIT_MEDIUM_FLAG
1835: | (i >> 8); // 010 00000
1836: write(_b);
1837: write(i & 0xFF);
1838: } else {
1839: // [8257, 1048576] ( [8256, 1048575] ) 20 bits
1840: i -= EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
1841: _b = EncodingConstants.INTEGER_2ND_BIT_LARGE_FLAG
1842: | (i >> 16); // 0110 0000
1843: write(_b);
1844: write((i >> 8) & 0xFF);
1845: write(i & 0xFF);
1846: }
1847: }
1848:
1849: /**
1850: * Encode a non zero integer on the third bit of an octet.
1851: * Implementation of clause C.27 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1852: *
1853: * @param i The integer to encode, which is a member of the interval
1854: * [0, 1048575]. In the specification the interval is [1, 1048576]
1855: *
1856: */
1857: protected final void encodeNonZeroIntegerOnThirdBit(int i)
1858: throws IOException {
1859: if (i < EncodingConstants.INTEGER_3RD_BIT_SMALL_LIMIT) {
1860: // [1, 32] ( [0, 31] ) 5 bits
1861: write(_b | i);
1862: } else if (i < EncodingConstants.INTEGER_3RD_BIT_MEDIUM_LIMIT) {
1863: // [33, 2080] ( [32, 2079] ) 11 bits
1864: i -= EncodingConstants.INTEGER_3RD_BIT_SMALL_LIMIT;
1865: _b |= EncodingConstants.INTEGER_3RD_BIT_MEDIUM_FLAG
1866: | (i >> 8); // 00100 000
1867: write(_b);
1868: write(i & 0xFF);
1869: } else if (i < EncodingConstants.INTEGER_3RD_BIT_LARGE_LIMIT) {
1870: // [2081, 526368] ( [2080, 526367] ) 19 bits
1871: i -= EncodingConstants.INTEGER_3RD_BIT_MEDIUM_LIMIT;
1872: _b |= EncodingConstants.INTEGER_3RD_BIT_LARGE_FLAG
1873: | (i >> 16); // 00101 000
1874: write(_b);
1875: write((i >> 8) & 0xFF);
1876: write(i & 0xFF);
1877: } else {
1878: // [526369, 1048576] ( [526368, 1048575] ) 20 bits
1879: i -= EncodingConstants.INTEGER_3RD_BIT_LARGE_LIMIT;
1880: _b |= EncodingConstants.INTEGER_3RD_BIT_LARGE_LARGE_FLAG; // 00110 000
1881: write(_b);
1882: write(i >> 16);
1883: write((i >> 8) & 0xFF);
1884: write(i & 0xFF);
1885: }
1886: }
1887:
1888: /**
1889: * Encode a non zero integer on the fourth bit of an octet.
1890: * Implementation of clause C.28 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1891: *
1892: * @param i The integer to encode, which is a member of the interval
1893: * [0, 1048575]. In the specification the interval is [1, 1048576]
1894: *
1895: */
1896: protected final void encodeNonZeroIntegerOnFourthBit(int i)
1897: throws IOException {
1898: if (i < EncodingConstants.INTEGER_4TH_BIT_SMALL_LIMIT) {
1899: // [1, 16] ( [0, 15] ) 4 bits
1900: write(_b | i);
1901: } else if (i < EncodingConstants.INTEGER_4TH_BIT_MEDIUM_LIMIT) {
1902: // [17, 1040] ( [16, 1039] ) 10 bits
1903: i -= EncodingConstants.INTEGER_4TH_BIT_SMALL_LIMIT;
1904: _b |= EncodingConstants.INTEGER_4TH_BIT_MEDIUM_FLAG
1905: | (i >> 8); // 000 100 00
1906: write(_b);
1907: write(i & 0xFF);
1908: } else if (i < EncodingConstants.INTEGER_4TH_BIT_LARGE_LIMIT) {
1909: // [1041, 263184] ( [1040, 263183] ) 18 bits
1910: i -= EncodingConstants.INTEGER_4TH_BIT_MEDIUM_LIMIT;
1911: _b |= EncodingConstants.INTEGER_4TH_BIT_LARGE_FLAG
1912: | (i >> 16); // 000 101 00
1913: write(_b);
1914: write((i >> 8) & 0xFF);
1915: write(i & 0xFF);
1916: } else {
1917: // [263185, 1048576] ( [263184, 1048575] ) 20 bits
1918: i -= EncodingConstants.INTEGER_4TH_BIT_LARGE_LIMIT;
1919: _b |= EncodingConstants.INTEGER_4TH_BIT_LARGE_LARGE_FLAG; // 000 110 00
1920: write(_b);
1921: write(i >> 16);
1922: write((i >> 8) & 0xFF);
1923: write(i & 0xFF);
1924: }
1925: }
1926:
1927: /**
1928: * Encode a non empty string using the UTF-8 encoding.
1929: *
1930: * @param b the current octet that is being written.
1931: * @param s the string to be UTF-8 encoded.
1932: * @param constants the array of constants to use when encoding to determin
1933: * how the length of the UTF-8 encoded string is encoded.
1934: */
1935: protected final void encodeNonEmptyUTF8StringAsOctetString(int b,
1936: String s, int[] constants) throws IOException {
1937: final char[] ch = s.toCharArray();
1938: encodeNonEmptyUTF8StringAsOctetString(b, ch, 0, ch.length,
1939: constants);
1940: }
1941:
1942: /**
1943: * Encode a non empty string using the UTF-8 encoding.
1944: *
1945: * @param b the current octet that is being written.
1946: * @param ch the array of characters.
1947: * @param offset the offset into the array of characters.
1948: * @param length the length of characters.
1949: * how the length of the UTF-8 encoded string is encoded.
1950: * @param constants the array of constants to use when encoding to determin
1951: * how the length of the UTF-8 encoded string is encoded.
1952: */
1953: protected final void encodeNonEmptyUTF8StringAsOctetString(int b,
1954: char ch[], int offset, int length, int[] constants)
1955: throws IOException {
1956: length = encodeUTF8String(ch, offset, length);
1957: encodeNonZeroOctetStringLength(b, length, constants);
1958: write(_encodingBuffer, length);
1959: }
1960:
1961: /**
1962: * Encode the length of non empty UTF-8 encoded string.
1963: *
1964: * @param b the current octet that is being written.
1965: * @param length the length of the UTF-8 encoded string.
1966: * how the length of the UTF-8 encoded string is encoded.
1967: * @param constants the array of constants to use when encoding to determin
1968: * how the length of the UTF-8 encoded string is encoded.
1969: */
1970: protected final void encodeNonZeroOctetStringLength(int b,
1971: int length, int[] constants) throws IOException {
1972: if (length < constants[EncodingConstants.OCTET_STRING_LENGTH_SMALL_LIMIT]) {
1973: write(b | (length - 1));
1974: } else if (length < constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_LIMIT]) {
1975: write(b
1976: | constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_FLAG]);
1977: write(length
1978: - constants[EncodingConstants.OCTET_STRING_LENGTH_SMALL_LIMIT]);
1979: } else {
1980: write(b
1981: | constants[EncodingConstants.OCTET_STRING_LENGTH_LARGE_FLAG]);
1982: length -= constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_LIMIT];
1983: write(length >>> 24);
1984: write((length >> 16) & 0xFF);
1985: write((length >> 8) & 0xFF);
1986: write(length & 0xFF);
1987: }
1988: }
1989:
1990: /**
1991: * Encode a non zero integer.
1992: *
1993: * @param b the current octet that is being written.
1994: * @param i the non zero integer.
1995: * @param constants the array of constants to use when encoding to determin
1996: * how the non zero integer is encoded.
1997: */
1998: protected final void encodeNonZeroInteger(int b, int i,
1999: int[] constants) throws IOException {
2000: if (i < constants[EncodingConstants.INTEGER_SMALL_LIMIT]) {
2001: write(b | i);
2002: } else if (i < constants[EncodingConstants.INTEGER_MEDIUM_LIMIT]) {
2003: i -= constants[EncodingConstants.INTEGER_SMALL_LIMIT];
2004: write(b | constants[EncodingConstants.INTEGER_MEDIUM_FLAG]
2005: | (i >> 8));
2006: write(i & 0xFF);
2007: } else if (i < constants[EncodingConstants.INTEGER_LARGE_LIMIT]) {
2008: i -= constants[EncodingConstants.INTEGER_MEDIUM_LIMIT];
2009: write(b | constants[EncodingConstants.INTEGER_LARGE_FLAG]
2010: | (i >> 16));
2011: write((i >> 8) & 0xFF);
2012: write(i & 0xFF);
2013: } else if (i < EncodingConstants.INTEGER_MAXIMUM_SIZE) {
2014: i -= constants[EncodingConstants.INTEGER_LARGE_LIMIT];
2015: write(b
2016: | constants[EncodingConstants.INTEGER_LARGE_LARGE_FLAG]);
2017: write(i >> 16);
2018: write((i >> 8) & 0xFF);
2019: write(i & 0xFF);
2020: } else {
2021: throw new IOException(
2022: CommonResourceBundle
2023: .getInstance()
2024: .getString(
2025: "message.integerMaxSize",
2026: new Object[] { new Integer(
2027: EncodingConstants.INTEGER_MAXIMUM_SIZE) }));
2028: }
2029: }
2030:
2031: /**
2032: * Mark the current position in the buffered stream.
2033: */
2034: protected final void mark() throws IOException {
2035: _markIndex = _octetBufferIndex;
2036: }
2037:
2038: /**
2039: * Reset the marked position in the buffered stream.
2040: */
2041: protected final void resetMark() throws IOException {
2042: _markIndex = -1;
2043: }
2044:
2045: /**
2046: * Write a byte to the buffered stream.
2047: */
2048: protected final void write(int i) throws IOException {
2049: if (_octetBufferIndex < _octetBuffer.length) {
2050: _octetBuffer[_octetBufferIndex++] = (byte) i;
2051: } else {
2052: if (_markIndex == -1) {
2053: _s.write(_octetBuffer);
2054: _octetBufferIndex = 1;
2055: _octetBuffer[0] = (byte) i;
2056: } else {
2057: resize(_octetBuffer.length * 3 / 2);
2058: _octetBuffer[_octetBufferIndex++] = (byte) i;
2059: }
2060: }
2061: }
2062:
2063: /**
2064: * Write an array of bytes to the buffered stream.
2065: *
2066: * @param b the array of bytes.
2067: * @param length the length of bytes.
2068: */
2069: protected final void write(byte[] b, int length) throws IOException {
2070: write(b, 0, length);
2071: }
2072:
2073: /**
2074: * Write an array of bytes to the buffered stream.
2075: *
2076: * @param b the array of bytes.
2077: * @param offset the offset into the array of bytes.
2078: * @param length the length of bytes.
2079: */
2080: protected final void write(byte[] b, int offset, int length)
2081: throws IOException {
2082: if ((_octetBufferIndex + length) < _octetBuffer.length) {
2083: System.arraycopy(b, offset, _octetBuffer,
2084: _octetBufferIndex, length);
2085: _octetBufferIndex += length;
2086: } else {
2087: if (_markIndex == -1) {
2088: _s.write(_octetBuffer, 0, _octetBufferIndex);
2089: _s.write(b, offset, length);
2090: _octetBufferIndex = 0;
2091: } else {
2092: resize((_octetBuffer.length + length) * 3 / 2 + 1);
2093: System.arraycopy(b, offset, _octetBuffer,
2094: _octetBufferIndex, length);
2095: _octetBufferIndex += length;
2096: }
2097: }
2098: }
2099:
2100: private void ensureSize(int length) {
2101: if ((_octetBufferIndex + length) > _octetBuffer.length) {
2102: resize((_octetBufferIndex + length) * 3 / 2 + 1);
2103: }
2104: }
2105:
2106: private void resize(int length) {
2107: byte[] b = new byte[length];
2108: System.arraycopy(_octetBuffer, 0, b, 0, _octetBufferIndex);
2109: _octetBuffer = b;
2110: }
2111:
2112: private void _flush() throws IOException {
2113: if (_octetBufferIndex > 0) {
2114: _s.write(_octetBuffer, 0, _octetBufferIndex);
2115: _octetBufferIndex = 0;
2116: }
2117: }
2118:
2119: private EncodingBufferOutputStream _encodingBufferOutputStream = new EncodingBufferOutputStream();
2120:
2121: private byte[] _encodingBuffer = new byte[512];
2122:
2123: private int _encodingBufferIndex;
2124:
2125: private class EncodingBufferOutputStream extends OutputStream {
2126:
2127: public void write(int b) throws IOException {
2128: if (_encodingBufferIndex < _encodingBuffer.length) {
2129: _encodingBuffer[_encodingBufferIndex++] = (byte) b;
2130: } else {
2131: byte newbuf[] = new byte[Math.max(
2132: _encodingBuffer.length << 1,
2133: _encodingBufferIndex)];
2134: System.arraycopy(_encodingBuffer, 0, newbuf, 0,
2135: _encodingBufferIndex);
2136: _encodingBuffer = newbuf;
2137:
2138: _encodingBuffer[_encodingBufferIndex++] = (byte) b;
2139: }
2140: }
2141:
2142: public void write(byte b[], int off, int len)
2143: throws IOException {
2144: if ((off < 0) || (off > b.length) || (len < 0)
2145: || ((off + len) > b.length) || ((off + len) < 0)) {
2146: throw new IndexOutOfBoundsException();
2147: } else if (len == 0) {
2148: return;
2149: }
2150: final int newoffset = _encodingBufferIndex + len;
2151: if (newoffset > _encodingBuffer.length) {
2152: byte newbuf[] = new byte[Math.max(
2153: _encodingBuffer.length << 1, newoffset)];
2154: System.arraycopy(_encodingBuffer, 0, newbuf, 0,
2155: _encodingBufferIndex);
2156: _encodingBuffer = newbuf;
2157: }
2158: System.arraycopy(b, off, _encodingBuffer,
2159: _encodingBufferIndex, len);
2160: _encodingBufferIndex = newoffset;
2161: }
2162:
2163: public int getLength() {
2164: return _encodingBufferIndex;
2165: }
2166:
2167: public void reset() {
2168: _encodingBufferIndex = 0;
2169: }
2170: }
2171:
2172: /**
2173: * Encode a string using the UTF-8 encoding.
2174: *
2175: * @param s the string to encode.
2176: */
2177: protected final int encodeUTF8String(String s) throws IOException {
2178: final int length = s.length();
2179: if (length < _charBuffer.length) {
2180: s.getChars(0, length, _charBuffer, 0);
2181: return encodeUTF8String(_charBuffer, 0, length);
2182: } else {
2183: char[] ch = s.toCharArray();
2184: return encodeUTF8String(ch, 0, length);
2185: }
2186: }
2187:
2188: private void ensureEncodingBufferSizeForUtf8String(int length) {
2189: final int newLength = 4 * length;
2190: if (_encodingBuffer.length < newLength) {
2191: _encodingBuffer = new byte[newLength];
2192: }
2193: }
2194:
2195: /**
2196: * Encode a string using the UTF-8 encoding.
2197: *
2198: * @param ch the array of characters.
2199: * @param offset the offset into the array of characters.
2200: * @param length the length of characters.
2201: */
2202: protected final int encodeUTF8String(char[] ch, int offset,
2203: int length) throws IOException {
2204: int bpos = 0;
2205:
2206: // Make sure buffer is large enough
2207: ensureEncodingBufferSizeForUtf8String(length);
2208:
2209: final int end = offset + length;
2210: int c;
2211: while (end != offset) {
2212: c = ch[offset++];
2213: if (c < 0x80) {
2214: // 1 byte, 7 bits
2215: _encodingBuffer[bpos++] = (byte) c;
2216: } else if (c < 0x800) {
2217: // 2 bytes, 11 bits
2218: _encodingBuffer[bpos++] = (byte) (0xC0 | (c >> 6)); // first 5
2219: _encodingBuffer[bpos++] = (byte) (0x80 | (c & 0x3F)); // second 6
2220: } else if (c <= '\uFFFF') {
2221: if (!XMLChar.isHighSurrogate(c)
2222: && !XMLChar.isLowSurrogate(c)) {
2223: // 3 bytes, 16 bits
2224: _encodingBuffer[bpos++] = (byte) (0xE0 | (c >> 12)); // first 4
2225: _encodingBuffer[bpos++] = (byte) (0x80 | ((c >> 6) & 0x3F)); // second 6
2226: _encodingBuffer[bpos++] = (byte) (0x80 | (c & 0x3F)); // third 6
2227: } else {
2228: // 4 bytes, high and low surrogate
2229: encodeCharacterAsUtf8FourByte(c, ch, offset, end,
2230: bpos);
2231: bpos += 4;
2232: offset++;
2233: }
2234: }
2235: }
2236:
2237: return bpos;
2238: }
2239:
2240: private void encodeCharacterAsUtf8FourByte(int c, char[] ch,
2241: int chpos, int chend, int bpos) throws IOException {
2242: if (chpos == chend) {
2243: throw new IOException("");
2244: }
2245:
2246: final char d = ch[chpos];
2247: if (!XMLChar.isLowSurrogate(d)) {
2248: throw new IOException("");
2249: }
2250:
2251: final int uc = (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
2252: if (uc < 0 || uc >= 0x200000) {
2253: throw new IOException("");
2254: }
2255:
2256: _encodingBuffer[bpos++] = (byte) (0xF0 | ((uc >> 18)));
2257: _encodingBuffer[bpos++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
2258: _encodingBuffer[bpos++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
2259: _encodingBuffer[bpos++] = (byte) (0x80 | (uc & 0x3F));
2260: }
2261:
2262: /**
2263: * Encode a string using the UTF-16 encoding.
2264: *
2265: * @param s the string to encode.
2266: */
2267: protected final int encodeUtf16String(String s) throws IOException {
2268: final int length = s.length();
2269: if (length < _charBuffer.length) {
2270: s.getChars(0, length, _charBuffer, 0);
2271: return encodeUtf16String(_charBuffer, 0, length);
2272: } else {
2273: char[] ch = s.toCharArray();
2274: return encodeUtf16String(ch, 0, length);
2275: }
2276: }
2277:
2278: private void ensureEncodingBufferSizeForUtf16String(int length) {
2279: final int newLength = 2 * length;
2280: if (_encodingBuffer.length < newLength) {
2281: _encodingBuffer = new byte[newLength];
2282: }
2283: }
2284:
2285: /**
2286: * Encode a string using the UTF-16 encoding.
2287: *
2288: * @param ch the array of characters.
2289: * @param offset the offset into the array of characters.
2290: * @param length the length of characters.
2291: */
2292: protected final int encodeUtf16String(char[] ch, int offset,
2293: int length) throws IOException {
2294: int byteLength = 0;
2295:
2296: // Make sure buffer is large enough
2297: ensureEncodingBufferSizeForUtf16String(length);
2298:
2299: final int n = offset + length;
2300: for (int i = offset; i < n; i++) {
2301: final int c = (int) ch[i];
2302: _encodingBuffer[byteLength++] = (byte) (c >> 8);
2303: _encodingBuffer[byteLength++] = (byte) (c & 0xFF);
2304: }
2305:
2306: return byteLength;
2307: }
2308:
2309: /**
2310: * Obtain the prefix from a qualified name.
2311: *
2312: * @param qName the qualified name
2313: * @return the prefix, or "" if there is no prefix.
2314: */
2315: public static String getPrefixFromQualifiedName(String qName) {
2316: int i = qName.indexOf(':');
2317: String prefix = "";
2318: if (i != -1) {
2319: prefix = qName.substring(0, i);
2320: }
2321: return prefix;
2322: }
2323:
2324: /**
2325: * Check if character array contains characters that are all white space.
2326: *
2327: * @param ch the character array
2328: * @param start the starting character index into the array to check from
2329: * @param length the number of characters to check
2330: * @return true if all characters are white space, false otherwise
2331: */
2332: public static boolean isWhiteSpace(final char[] ch, int start,
2333: final int length) {
2334: if (!XMLChar.isSpace(ch[start]))
2335: return false;
2336:
2337: final int end = start + length;
2338: start++;
2339: while (start < end && XMLChar.isSpace(ch[start++]))
2340: ;
2341: return start == end;
2342: }
2343:
2344: /**
2345: * Check if a String contains characters that are all white space.
2346: *
2347: * @param s the string
2348: * @return true if all characters are white space, false otherwise
2349: */
2350: public static boolean isWhiteSpace(String s) {
2351: if (!XMLChar.isSpace(s.charAt(0)))
2352: return false;
2353:
2354: final int end = s.length();
2355: int start = 1;
2356: while (start < end && XMLChar.isSpace(s.charAt(start++)))
2357: ;
2358: return start == end;
2359: }
2360: }
|