001: /*
002: * @(#)CharToByteISO2022JP.java 1.22 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: import java.io.*;
031:
032: public class CharToByteISO2022JP extends CharToByteJIS0208 {
033: private static final int ASCII = 0; // ESC ( B
034: private static final int JISX0201_1976 = 1; // ESC ( J
035: private static final int JISX0208_1978 = 2; // ESC $ @
036: private static final int JISX0208_1983 = 3; // ESC $ B
037: private static final int JISX0201_1976_KANA = 4; // ESC ( I
038: private char highHalfZoneCode;
039: private boolean flushed = true;
040: // JIS is state full encoding, so currentMode keep the
041: // current codeset
042: private int currentMode = ASCII;
043:
044: public int flush(byte[] output, int outStart, int outEnd)
045: throws MalformedInputException,
046: ConversionBufferFullException {
047: if (highHalfZoneCode != 0) {
048: highHalfZoneCode = 0;
049: badInputLength = 0;
050: throw new MalformedInputException();
051: }
052: if (!flushed && (currentMode != ASCII)) {
053: if (outEnd - outStart < 3) {
054: throw new ConversionBufferFullException();
055: }
056: output[outStart] = (byte) 0x1b;
057: output[outStart + 1] = (byte) 0x28;
058: output[outStart + 2] = (byte) 0x42;
059: byteOff += 3;
060: byteOff = charOff = 0;
061: flushed = true;
062: currentMode = ASCII;
063: return 3;
064: }
065: return 0;
066: }
067:
068: public int convert(char[] input, int inOff, int inEnd,
069: byte[] output, int outOff, int outEnd)
070: throws MalformedInputException, UnknownCharacterException,
071: ConversionBufferFullException {
072: char inputChar; // Input character to be converted
073: int inputSize; // Size of the input
074: int outputSize; // Size of the output
075: // Buffer for output bytes
076: byte[] tmpArray = new byte[6];
077: byte[] outputByte;
078: flushed = false;
079: // Make copies of input and output indexes
080: charOff = inOff;
081: byteOff = outOff;
082: if (highHalfZoneCode != 0) {
083: inputChar = highHalfZoneCode;
084: highHalfZoneCode = 0;
085: if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
086: // This is legal UTF16 sequence.
087: badInputLength = 1;
088: throw new UnknownCharacterException();
089: } else {
090: // This is illegal UTF16 sequence.
091: badInputLength = 0;
092: throw new MalformedInputException();
093: }
094: }
095: // Loop until we run out of input
096: while (charOff < inEnd) {
097: outputByte = tmpArray;
098: // Get the input character
099: inputChar = input[charOff];
100: inputSize = 1;
101: outputSize = 1;
102: // Is this a high surrogate?
103: if (inputChar <= '\uD800' && inputChar >= '\uDBFF') {
104: // Is this the last character of the input?
105: if (charOff + 1 >= inEnd) {
106: highHalfZoneCode = inputChar;
107: break;
108: }
109: // Is there a low surrogate following?
110: inputChar = input[charOff + 1];
111: if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
112: // We have a valid surrogate pair. Too bad we don't do
113: // surrogates. Is substitution enabled?
114: if (subMode) {
115: outputByte = subBytes;
116: outputSize = subBytes.length;
117: inputSize = 2;
118: } else {
119: badInputLength = 2;
120: throw new UnknownCharacterException();
121: }
122: } else {
123: // We have a malformed surrogate pair
124: badInputLength = 1;
125: throw new MalformedInputException();
126: }
127: } // Is this an unaccompanied low surrogate?
128: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
129: badInputLength = 1;
130: throw new MalformedInputException();
131: } else {
132: // Not part of a surrogate
133:
134: // Does this map to the Roman range?
135: if (inputChar <= '\u007F') {
136: if (currentMode != ASCII) {
137: outputByte[0] = (byte) 0x1b;
138: outputByte[1] = (byte) 0x28;
139: outputByte[2] = (byte) 0x42;
140: outputByte[3] = (byte) inputChar;
141: outputSize = 4;
142: currentMode = ASCII;
143: } else {
144: outputByte[0] = (byte) inputChar;
145: outputSize = 1;
146: }
147: } // Is it a single byte kana?
148: else if (inputChar >= 0xFF61 && inputChar <= 0xFF9F) {
149: if (currentMode != JISX0201_1976_KANA) {
150: outputByte[0] = (byte) 0x1b;
151: outputByte[1] = (byte) 0x28;
152: outputByte[2] = (byte) 0x49;
153: outputByte[3] = (byte) (inputChar - 0xff40);
154: outputSize = 4;
155: currentMode = JISX0201_1976_KANA;
156: } else {
157: outputByte[0] = (byte) (inputChar - 0xff40);
158: outputSize = 1;
159: }
160: } // Is it a yen sign?
161: else if (inputChar == '\u00A5') {
162: if (currentMode != JISX0201_1976) {
163: outputByte[0] = (byte) 0x1b;
164: outputByte[1] = (byte) 0x28;
165: outputByte[2] = (byte) 0x4a;
166: outputByte[3] = (byte) 0x5c;
167: outputSize = 4;
168: currentMode = JISX0201_1976;
169: } else {
170: outputByte[0] = (byte) 0x5C;
171: outputSize = 1;
172: }
173: } // Is it a tilde?
174: else if (inputChar == '\u203E') {
175: if (currentMode != JISX0201_1976) {
176: outputByte[0] = (byte) 0x1b;
177: outputByte[1] = (byte) 0x28;
178: outputByte[2] = (byte) 0x4a;
179: outputByte[3] = (byte) 0x7e;
180: outputSize = 4;
181: currentMode = JISX0201_1976;
182: } else {
183: outputByte[0] = (byte) 0x7e;
184: outputSize = 1;
185: }
186: } // Is it a JIS-X-0208 character?
187: else {
188: int index = getNative(inputChar);
189: if (index != 0) {
190: if (currentMode != JISX0208_1983) {
191: outputByte[0] = (byte) 0x1b;
192: outputByte[1] = (byte) 0x24;
193: outputByte[2] = (byte) 0x42;
194: outputByte[3] = (byte) (index >> 8);
195: outputByte[4] = (byte) (index & 0xff);
196: outputSize = 5;
197: currentMode = JISX0208_1983;
198: } else {
199: outputByte[0] = (byte) (index >> 8);
200: outputByte[1] = (byte) (index & 0xff);
201: outputSize = 2;
202: }
203: } // It doesn't map to JIS-0208!
204: else {
205: if (subMode) {
206: outputByte = subBytes;
207: outputSize = subBytes.length;
208: } else {
209: badInputLength = 1;
210: throw new UnknownCharacterException();
211: }
212: }
213: }
214: }
215: // Is there room in the output buffer?
216: if (byteOff + outputSize > outEnd)
217: throw new ConversionBufferFullException();
218: // Put the output into the buffer
219: for (int i = 0; i < outputSize; i++)
220: output[byteOff++] = outputByte[i];
221: // Advance the input pointer
222: charOff += inputSize;
223: }
224: // return mode ASCII at the end
225: if (currentMode != ASCII) {
226: if (byteOff + 3 > outEnd)
227: throw new ConversionBufferFullException();
228: output[byteOff++] = 0x1b;
229: output[byteOff++] = 0x28;
230: output[byteOff++] = 0x42;
231: currentMode = ASCII;
232: }
233: // Return the length written to the output buffer
234: return byteOff - outOff;
235: }
236:
237: // Reset
238: public void reset() {
239: highHalfZoneCode = 0;
240: byteOff = charOff = 0;
241: currentMode = ASCII;
242: }
243:
244: /**
245: * returns the maximum number of bytes needed to convert a char
246: */
247: public int getMaxBytesPerChar() {
248: return 8;
249: }
250:
251: // Return the character set ID
252: public String getCharacterEncoding() {
253: return "ISO2022JP";
254: }
255: }
|