001: /*
002: * @(#)ByteToCharConverter.java 1.42 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: import java.io.*;
031:
032: /**
033: * An abstract base class for subclasses which convert character data
034: * in an external encoding into Unicode characters.
035: *
036: * @author Asmus Freytag
037: * @author Lloyd Honomichl
038: */
039: public abstract class ByteToCharConverter {
040:
041: /*
042: * Substitution mode flag.
043: */
044: protected boolean subMode = true;
045:
046: /*
047: * Characters to use for automatic substitution.
048: */
049: protected char[] subChars = { '\uFFFD' };
050:
051: /*
052: * Offset of next character to be output
053: */
054: protected int charOff;
055:
056: /*
057: * Offset of next byte to be converted
058: */
059: protected int byteOff;
060:
061: /*
062: * Length of bad input that caused a MalformedInputException.
063: */
064: protected int badInputLength;
065:
066: /**
067: * Create an instance of the default ByteToCharConverter subclass.
068: */
069: public static ByteToCharConverter getDefault() {
070: Object cvt;
071: cvt = Converters.newDefaultConverter(Converters.BYTE_TO_CHAR);
072: return (ByteToCharConverter) cvt;
073: }
074:
075: /**
076: * Returns appropriate ByteToCharConverter subclass instance.
077: * @param string represents encoding
078: */
079: public static ByteToCharConverter getConverter(String encoding)
080: throws UnsupportedEncodingException {
081: Object cvt;
082: cvt = Converters
083: .newConverter(Converters.BYTE_TO_CHAR, encoding);
084: return (ByteToCharConverter) cvt;
085: }
086:
087: /**
088: * Returns the character set id for the conversion
089: */
090: public abstract String getCharacterEncoding();
091:
092: /**
093: * Converts an array of bytes containing characters in an external
094: * encoding into an array of Unicode characters. This method allows
095: * a buffer by buffer conversion of a data stream. The state of the
096: * conversion is saved between calls to convert. Among other things,
097: * this means multibyte input sequences can be split between calls.
098: * If a call to convert results in an exception, the conversion may be
099: * continued by calling convert again with suitably modified parameters.
100: * All conversions should be finished with a call to the flush method.
101: *
102: * @return the number of bytes written to output.
103: * @param input byte array containing text to be converted.
104: * @param inStart begin conversion at this offset in input array.
105: * @param inEnd stop conversion at this offset in input array (exclusive).
106: * @param output character array to receive conversion result.
107: * @param outStart start writing to output array at this offset.
108: * @param outEnd stop writing to output array at this offset (exclusive).
109: * @exception MalformedInputException if the input buffer contains any
110: * sequence of bytes that is illegal for the input character set.
111: * @exception UnknownCharacterException for any character that
112: * that cannot be converted to Unicode. Thrown only when converter
113: * is not in substitution mode.
114: * @exception ConversionBufferFullException if output array is filled prior
115: * to converting all the input.
116: */
117: public abstract int convert(byte[] input, int inStart, int inEnd,
118: char[] output, int outStart, int outEnd)
119: throws MalformedInputException, UnknownCharacterException,
120: ConversionBufferFullException;
121:
122: /**
123: * Converts an array of bytes containing characters in an external
124: * encoding into an array of Unicode characters. Unlike convert,
125: * this method does not do incremental conversion. It assumes that
126: * the given input array contains all the characters to be
127: * converted. The state of the converter is reset at the beginning
128: * of this method and is left in the reset state on successful
129: * termination. The converter is not reset if an exception is
130: * thrown. This allows the caller to determine where the bad input
131: * was encountered by calling nextByteIndex.
132: * <p>
133: * This method uses substitution mode when performing the
134: * conversion. The method setSubstitutionChars may be used to
135: * determine what characters are substituted. Even though substitution
136: * mode is used, the state of the converter's substitution mode is
137: * not changed at the end of this method.
138: *
139: * @return an array of chars containing the converted characters.
140: * @param input array containing Unicode characters to be converted.
141: * @exception MalformedInputException if the input buffer contains any
142: * sequence of chars that is illegal in the input character encoding.
143: * After this exception is thrown,
144: * the method nextByteIndex can be called to obtain the index of the
145: * first invalid input byte and getBadInputLength can be called
146: * to determine the length of the invalid input.
147: *
148: * @see #nextByteIndex
149: * @see #setSubstitutionMode
150: * @see sun.io.CharToByteConverter#setSubstitutionBytes(byte[])
151: * @see #getBadInputLength
152: */
153: public char[] convertAll(byte input[])
154: throws MalformedInputException {
155: reset();
156: boolean savedSubMode = subMode;
157: subMode = true;
158:
159: char[] output = new char[getMaxCharsPerByte() * input.length];
160:
161: try {
162: int outputLength = convert(input, 0, input.length, output,
163: 0, output.length);
164: outputLength += flush(output, outputLength, output.length);
165:
166: char[] returnedOutput = new char[outputLength];
167: System
168: .arraycopy(output, 0, returnedOutput, 0,
169: outputLength);
170: return returnedOutput;
171: } catch (ConversionBufferFullException e) {
172: //Not supposed to happen. If it does, getMaxCharsPerByte() lied.
173: throw new InternalError(
174: "this.getMaxCharsBerByte returned bad value");
175: } catch (UnknownCharacterException e) {
176: // Not supposed to happen since we're in substitution mode.
177: throw new InternalError();
178: } finally {
179: subMode = savedSubMode;
180: }
181: }
182:
183: /**
184: * Writes any remaining output to the output buffer and resets the
185: * converter to its initial state.
186: *
187: * @param output char array to receive flushed output.
188: * @param outStart start writing to output array at this offset.
189: * @param outEnd stop writing to output array at this offset (exclusive).
190: * @exception MalformedInputException if the output to be flushed contained
191: * a partial or invalid multibyte character sequence. flush will
192: * write what it can to the output buffer and reset the converter before
193: * throwing this exception. An additional call to flush is not required.
194: * @exception ConversionBufferFullException if output array is filled
195: * before all the output can be flushed. flush will write what it can
196: * to the output buffer and remember its state. An additional call to
197: * flush with a new output buffer will conclude the operation.
198: */
199: public abstract int flush(char[] output, int outStart, int outEnd)
200: throws MalformedInputException,
201: ConversionBufferFullException;
202:
203: /**
204: * Resets converter to its initial state.
205: */
206: public abstract void reset();
207:
208: /**
209: * Returns the maximum number of characters needed to convert a byte. Useful
210: * for calculating the maximum output buffer size needed for a particular
211: * input buffer.
212: */
213: public int getMaxCharsPerByte() {
214: // Until UTF-16, this will do for every encoding
215: return 1;
216: }
217:
218: /**
219: * Returns the length, in bytes, of the input which caused a
220: * MalformedInputException. Always refers to the last
221: * MalformedInputException thrown by the converter. If none have
222: * ever been thrown, returns 0.
223: */
224: public int getBadInputLength() {
225: return badInputLength;
226: }
227:
228: /**
229: * Returns the index of the character just past the last character
230: * written by the previous call to convert.
231: */
232: public int nextCharIndex() {
233: return charOff;
234: }
235:
236: /**
237: * Returns the index of the byte just past the last byte successfully
238: * converted by the previous call to convert.
239: */
240: public int nextByteIndex() {
241: return byteOff;
242: }
243:
244: /**
245: * Sets converter into substitution mode. In substitution mode,
246: * the converter will replace untranslatable characters in the source
247: * encoding with the substitution character set by setSubstitionChars.
248: * When not in substitution mode, the converter will throw an
249: * UnknownCharacterException when it encounters untranslatable input.
250: *
251: * @param doSub if true, enable substitution mode.
252: * @see #setSubstitutionChars
253: */
254: public void setSubstitutionMode(boolean doSub) {
255: subMode = doSub;
256: }
257:
258: /**
259: * Sets the substitution characters to use when the converter is in
260: * substitution mode. The given chars must not be
261: * longer than the value returned by getMaxCharsPerByte for this
262: * converter.
263: *
264: * @param newSubBytes the substitution bytes
265: * @exception IllegalArgumentException if given byte array is longer than
266: * the value returned by the method getMaxBytesPerChar.
267: * @see #setSubstitutionMode
268: * @see #getMaxBytesPerChar
269: */
270: /**
271: * sets the substitution character to use
272: * @param c the substitution character
273: */
274: public void setSubstitutionChars(char[] c)
275: throws IllegalArgumentException {
276: if (c.length > getMaxCharsPerByte()) {
277: throw new IllegalArgumentException();
278: }
279:
280: subChars = new char[c.length];
281: System.arraycopy(c, 0, subChars, 0, c.length);
282: }
283:
284: /**
285: * returns a string representation of the character conversion
286: */
287: public String toString() {
288: return "ByteToCharConverter: " + getCharacterEncoding();
289: }
290: }
|