001: /*
002: * @(#)CharToByteEUC.java 1.11 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027: package sun.io;
028:
029: public abstract class CharToByteEUC extends CharToByteConverter {
030: private char highHalfZoneCode;
031: private byte[] outputByte;
032: protected short index1[];
033: protected String index2;
034: protected String index2a;
035: protected String index2b;
036: protected String index2c;
037: protected int mask1;
038: protected int mask2;
039: protected int shift;
040: private byte[] workByte = new byte[4];
041:
042: /**
043: * flush out any residual data and reset the buffer state
044: */
045: public int flush(byte[] output, int outStart, int outEnd)
046: throws MalformedInputException,
047: ConversionBufferFullException {
048: if (highHalfZoneCode != 0) {
049: reset();
050: badInputLength = 0;
051: throw new MalformedInputException();
052: }
053: reset();
054: return 0;
055: }
056:
057: /**
058: * Character conversion
059: */
060: public int convert(char[] input, int inOff, int inEnd,
061: byte[] output, int outOff, int outEnd)
062: throws UnknownCharacterException, MalformedInputException,
063: ConversionBufferFullException {
064: char inputChar;
065: int inputSize;
066: byteOff = outOff;
067: charOff = inOff;
068: while (charOff < inEnd) {
069: outputByte = workByte;
070: int index;
071: int theBytes;
072: int spaceNeeded;
073: boolean allZeroes = true;
074: int i;
075: if (highHalfZoneCode == 0) {
076: inputChar = input[charOff];
077: inputSize = 1;
078: } else {
079: inputChar = highHalfZoneCode;
080: inputSize = 0;
081: highHalfZoneCode = 0;
082: }
083: // Is this a high surrogate?
084: if (inputChar >= '\ud800' && inputChar <= '\udbff') {
085: // Is this the last character of the input?
086: if (charOff + inputSize >= inEnd) {
087: highHalfZoneCode = inputChar;
088: charOff += inputSize;
089: break;
090: }
091: // Is there a low surrogate following?
092: inputChar = input[charOff + inputSize];
093: if (inputChar >= '\udc00' && inputChar <= '\udfff') {
094: // We have a valid surrogate pair. Too bad we don't do
095: // surrogates. Is substitution enabled?
096: if (subMode) {
097: outputByte = subBytes;
098: inputSize++;
099: } else {
100: badInputLength = 2;
101: throw new UnknownCharacterException();
102: }
103: } else {
104: // We have a malformed surrogate pair
105: badInputLength = 1;
106: throw new MalformedInputException();
107: }
108: } // Is this an unaccompanied low surrogate?
109: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
110: badInputLength = 1;
111: throw new MalformedInputException();
112: } else {
113: String theChars;
114: char aChar;
115: // We have a valid character, get the bytes for it
116: index = index1[((inputChar & mask1) >> shift)]
117: + (inputChar & mask2);
118: if (index < 7500)
119: theChars = index2;
120: else if (index < 15000) {
121: index = index - 7500;
122: theChars = index2a;
123: } else if (index < 22500) {
124: index = index - 15000;
125: theChars = index2b;
126: } else {
127: index = index - 22500;
128: theChars = index2c;
129: }
130: aChar = theChars.charAt(2 * index);
131: outputByte[0] = (byte) ((aChar & 0xff00) >> 8);
132: outputByte[1] = (byte) (aChar & 0x00ff);
133: aChar = theChars.charAt(2 * index + 1);
134: outputByte[2] = (byte) ((aChar & 0xff00) >> 8);
135: outputByte[3] = (byte) (aChar & 0x00ff);
136: }
137: // if there was no mapping - look for substitution characters
138:
139: for (i = 0; i < outputByte.length; i++) {
140: if (outputByte[i] != 0x00) {
141: allZeroes = false;
142: break;
143: }
144: }
145: if (allZeroes && inputChar != '\u0000') {
146: if (subMode) {
147: outputByte = subBytes;
148: } else {
149: badInputLength = 1;
150: throw new UnknownCharacterException();
151: }
152: }
153: int oindex = 0;
154: for (spaceNeeded = outputByte.length; spaceNeeded > 1; spaceNeeded--) {
155: if (outputByte[oindex++] != 0x00)
156: break;
157: }
158: if (byteOff + spaceNeeded > outEnd)
159: throw new ConversionBufferFullException();
160: for (i = outputByte.length - spaceNeeded; i < outputByte.length; i++) {
161: output[byteOff++] = outputByte[i];
162: }
163: charOff += inputSize;
164: }
165: return byteOff - outOff;
166: }
167:
168: /**
169: * Resets converter to its initial state.
170: */
171: public void reset() {
172: charOff = byteOff = 0;
173: highHalfZoneCode = 0;
174: }
175:
176: /**
177: * Returns the maximum number of bytes needed to convert a char.
178: */
179: public int getMaxBytesPerChar() {
180: return 2;
181: }
182:
183: /**
184: * Returns true if the given character can be converted to the
185: * target character encoding.
186: */
187: public boolean canConvert(char ch) {
188: int index;
189: String theChars;
190: index = index1[((ch & mask1) >> shift)] + (ch & mask2);
191: if (index < 7500)
192: theChars = index2;
193: else if (index < 15000) {
194: index = index - 7500;
195: theChars = index2a;
196: } else if (index < 22500) {
197: index = index - 15000;
198: theChars = index2b;
199: } else {
200: index = index - 22500;
201: theChars = index2c;
202: }
203: if (theChars.charAt(2 * index) != '\u0000'
204: || theChars.charAt(2 * index + 1) != '\u0000')
205: return (true);
206: // only return true if input char was unicode null - all others are
207: // undefined
208: return (ch == '\u0000');
209: }
210: }
|