001: /*
002: * @(#)ByteToCharUTF8.java 1.25 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027: package sun.io;
028:
029: /**
030: * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
031: *
032: * see CharToByteUTF8.java about UTF-8 format
033: */
034:
035: public class ByteToCharUTF8 extends ByteToCharConverter {
036:
037: private int savedSize;
038: private byte[] savedBytes;
039:
040: public ByteToCharUTF8() {
041: super ();
042: savedSize = 0;
043: savedBytes = new byte[5];
044: }
045:
046: public int flush(char[] output, int outStart, int outEnd)
047: throws MalformedInputException {
048: if (savedSize != 0) {
049: savedSize = 0;
050: badInputLength = 0;
051: throw new MalformedInputException();
052: }
053: byteOff = charOff = 0;
054: return 0;
055: }
056:
057: /**
058: * Character converson
059: */
060: public int convert(byte[] input, int inOff, int inEnd,
061: char[] output, int outOff, int outEnd)
062: throws MalformedInputException,
063: ConversionBufferFullException {
064: int byte1, byte2, byte3, byte4;
065: char[] outputChar = new char[2];
066: int outputSize;
067: int byteOffAdjustment = 0;
068:
069: if (savedSize != 0) {
070: byte[] newBuf;
071: newBuf = new byte[inEnd - inOff + savedSize];
072: for (int i = 0; i < savedSize; i++) {
073: newBuf[i] = savedBytes[i];
074: }
075: System.arraycopy(input, inOff, newBuf, savedSize, inEnd
076: - inOff);
077: input = newBuf;
078: inOff = 0;
079: inEnd = newBuf.length;
080: byteOffAdjustment = -savedSize;
081: savedSize = 0;
082: }
083:
084: charOff = outOff;
085: byteOff = inOff;
086: int startByteOff;
087:
088: while (byteOff < inEnd) {
089:
090: startByteOff = byteOff;
091: byte1 = input[byteOff++] & 0xff;
092:
093: if ((byte1 & 0x80) == 0) {
094: outputChar[0] = (char) byte1;
095: outputSize = 1;
096: } else if ((byte1 & 0xe0) == 0xc0) {
097: if (byteOff >= inEnd) {
098: savedSize = 1;
099: savedBytes[0] = (byte) byte1;
100: break;
101: }
102: byte2 = input[byteOff++] & 0xff;
103: if ((byte2 & 0xc0) != 0x80) {
104: badInputLength = 2;
105: byteOff += byteOffAdjustment;
106: throw new MalformedInputException();
107: }
108: outputChar[0] = (char) (((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
109: outputSize = 1;
110: } else if ((byte1 & 0xf0) == 0xe0) {
111: if (byteOff + 1 >= inEnd) {
112: savedBytes[0] = (byte) byte1;
113: if (byteOff >= inEnd) {
114: savedSize = 1;
115: } else {
116: savedSize = 2;
117: savedBytes[1] = (byte) input[byteOff++];
118: }
119: break;
120: }
121: byte2 = input[byteOff++] & 0xff;
122: byte3 = input[byteOff++] & 0xff;
123: if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
124: badInputLength = 3;
125: byteOff += byteOffAdjustment;
126: throw new MalformedInputException();
127: }
128: outputChar[0] = (char) (((byte1 & 0x0f) << 12)
129: | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f));
130: outputSize = 1;
131: } else if ((byte1 & 0xf8) == 0xf0) {
132: if (byteOff + 2 >= inEnd) {
133: savedBytes[0] = (byte) byte1;
134: if (byteOff >= inEnd) {
135: savedSize = 1;
136: } else if (byteOff + 1 >= inEnd) {
137: savedSize = 2;
138: savedBytes[1] = input[byteOff++];
139: } else {
140: savedSize = 3;
141: savedBytes[1] = input[byteOff++];
142: savedBytes[2] = input[byteOff++];
143: }
144: break;
145: }
146: byte2 = input[byteOff++] & 0xff;
147: byte3 = input[byteOff++] & 0xff;
148: byte4 = input[byteOff++] & 0xff;
149: if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80
150: || (byte4 & 0xc0) != 0x80) {
151: badInputLength = 4;
152: byteOff += byteOffAdjustment;
153: throw new MalformedInputException();
154: }
155: // this byte sequence is UTF16 character
156: int ucs4 = (0x07 & byte1) << 18 | (0x3f & byte2) << 12
157: | (0x3f & byte3) << 6 | (0x3f & byte4);
158: outputChar[0] = (char) ((ucs4 - 0x10000) / 0x400 + 0xd800);
159: outputChar[1] = (char) ((ucs4 - 0x10000) % 0x400 + 0xdc00);
160: outputSize = 2;
161: } else {
162: badInputLength = 1;
163: byteOff += byteOffAdjustment;
164: throw new MalformedInputException();
165: }
166:
167: if (charOff + outputSize > outEnd) {
168: byteOff = startByteOff;
169: byteOff += byteOffAdjustment;
170: throw new ConversionBufferFullException();
171: }
172:
173: for (int i = 0; i < outputSize; i++) {
174: output[charOff + i] = outputChar[i];
175: }
176: charOff += outputSize;
177: }
178:
179: byteOff += byteOffAdjustment;
180: return charOff - outOff;
181: }
182:
183: /*
184: * Return the character set id
185: */
186: public String getCharacterEncoding() {
187: return "UTF8";
188: }
189:
190: /*
191: * Reset after finding bad input
192: */
193: public void reset() {
194: byteOff = charOff = 0;
195: savedSize = 0;
196: }
197: }
|