001: /*
002: * @(#)ByteToCharUnicode.java 1.17 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: import java.io.*;
031:
032: /**
033: * Convert byte arrays containing Unicode characters into arrays of actual
034: * Unicode characters. This class may be used directly, in which case it
035: * expects the input byte array to begin with a byte-order mark, or it may be
036: * subclassed in order to preset the byte order and mark behavior.
037: *
038: * <p> Whether or not a mark is expected, if a mark that does not match the
039: * established byte order is later discovered then a
040: * <tt>MalformedInputException</tt> will be thrown by the <tt>convert</tt>
041: * method. If a correct mark is seen later in the input stream, it is passed
042: * through as a character.
043: *
044: * @see ByteToCharUnicodeLittle
045: * @see ByteToCharUnicodeLittleUnmarked
046: * @see ByteToCharUnicodeBig
047: * @see ByteToCharUnicodeBigUnmarked
048: *
049: * @version 1.10, 00/02/02
050: * @author Mark Reinhold
051: */
052:
053: public class ByteToCharUnicode extends ByteToCharConverter {
054:
055: static final char BYTE_ORDER_MARK = (char) 0xfeff;
056: static final char REVERSED_MARK = (char) 0xfffe;
057:
058: static final int AUTO = 0;
059: static final int BIG = 1;
060: static final int LITTLE = 2;
061:
062: int originalByteOrder; /* Byte order specified at creation */
063: int byteOrder; /* Byte order in use */
064: boolean usesMark; /* Look for a mark and interpret it */
065:
066: /**
067: * Creates a Unicode byte-to-char converter that expects the first pair of
068: * input bytes to be a byte-order mark, which will be interpreted and
069: * discarded. If the first pair of bytes is not such a mark then a
070: * <tt>MalformedInputException</tt> will be thrown by the convert method.
071: */
072: public ByteToCharUnicode() {
073: originalByteOrder = byteOrder = AUTO;
074: usesMark = true;
075: }
076:
077: /**
078: * Creates a Unicode byte-to-char converter that uses the given byte order
079: * and may or may not insist upon an initial byte-order mark.
080: */
081: protected ByteToCharUnicode(int bo, boolean m) {
082: originalByteOrder = byteOrder = bo;
083: usesMark = m;
084: }
085:
086: public String getCharacterEncoding() {
087: switch (originalByteOrder) {
088: case BIG:
089: return usesMark ? "UnicodeBig" : "UnicodeBigUnmarked";
090: case LITTLE:
091: return usesMark ? "UnicodeLittle" : "UnicodeLittleUnmarked";
092: default:
093: return "Unicode";
094: }
095: }
096:
097: boolean started = false;
098: int leftOverByte;
099: boolean leftOver = false;
100:
101: public int convert(byte[] in, int inOff, int inEnd, char[] out,
102: int outOff, int outEnd)
103: throws ConversionBufferFullException,
104: MalformedInputException {
105: byteOff = inOff;
106: charOff = outOff;
107:
108: if (inOff >= inEnd)
109: return 0;
110:
111: int b1, b2;
112: int bc = 0;
113: int inI = inOff, outI = outOff;
114:
115: if (leftOver) {
116: b1 = leftOverByte & 0xff;
117: leftOver = false;
118: } else {
119: b1 = in[inI++] & 0xff;
120: }
121: bc = 1;
122:
123: if (usesMark && !started) { /* Read initial byte-order mark */
124: if (inI < inEnd) {
125: b2 = in[inI++] & 0xff;
126: bc = 2;
127:
128: char c = (char) ((b1 << 8) | b2);
129: int bo = AUTO;
130:
131: if (c == BYTE_ORDER_MARK)
132: bo = BIG;
133: else if (c == REVERSED_MARK)
134: bo = LITTLE;
135:
136: if (byteOrder == AUTO) {
137: if (bo == AUTO) {
138: badInputLength = bc;
139: throw new MalformedInputException(
140: "Missing byte-order mark");
141: }
142: byteOrder = bo;
143: if (inI < inEnd) {
144: b1 = in[inI++] & 0xff;
145: bc = 1;
146: }
147: } else if (bo == AUTO) {
148: inI--;
149: bc = 1;
150: } else if (byteOrder == bo) {
151: if (inI < inEnd) {
152: b1 = in[inI++] & 0xff;
153: bc = 1;
154: }
155: } else {
156: badInputLength = bc;
157: throw new MalformedInputException(
158: "Incorrect byte-order mark");
159: }
160:
161: started = true;
162: }
163: }
164:
165: /* Loop invariant: (b1 contains the next input byte) && (bc == 1) */
166: while (inI < inEnd) {
167: b2 = in[inI++] & 0xff;
168: bc = 2;
169:
170: char c;
171: if (byteOrder == BIG)
172: c = (char) ((b1 << 8) | b2);
173: else
174: c = (char) ((b2 << 8) | b1);
175:
176: if (c == REVERSED_MARK)
177: throw new MalformedInputException(
178: "Reversed byte-order mark");
179:
180: if (outI >= outEnd)
181: throw new ConversionBufferFullException();
182: out[outI++] = c;
183: byteOff = inI;
184: charOff = outI;
185:
186: if (inI < inEnd) {
187: b1 = in[inI++] & 0xff;
188: bc = 1;
189: }
190: }
191:
192: if (bc == 1) {
193: leftOverByte = b1;
194: byteOff = inI;
195: leftOver = true;
196: }
197:
198: return outI - outOff;
199: }
200:
201: public void reset() {
202: leftOver = false;
203: byteOff = charOff = 0;
204: started = false;
205: byteOrder = originalByteOrder;
206: }
207:
208: public int flush(char buf[], int off, int len)
209: throws MalformedInputException {
210: if (leftOver) {
211: reset();
212: throw new MalformedInputException();
213: }
214: byteOff = charOff = 0;
215: return 0;
216: }
217:
218: }
|