001: /*
002: * @(#)CharToByteDoubleByte.java 1.12 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: /**
031: * @author Limin Shi
032: */
033:
034: public abstract class CharToByteDoubleByte extends CharToByteConverter {
035: /*
036: * 1st level index, provided by subclass
037: */
038: protected short index1[];
039: /*
040: * 2nd level index, provided by subclass
041: */
042: protected String index2[];
043: /*
044: * Size of bad input that caused conversion to stop
045: */
046: protected int badInputLength;
047: protected char highHalfZoneCode;
048:
049: public int flush(byte[] output, int outStart, int outEnd)
050: throws MalformedInputException,
051: ConversionBufferFullException {
052: if (highHalfZoneCode != 0) {
053: highHalfZoneCode = 0;
054: badInputLength = 0;
055: throw new MalformedInputException();
056: }
057: byteOff = charOff = 0;
058: return 0;
059: }
060:
061: /**
062: * Converts characters to sequences of bytes.
063: * Conversions that result in Exceptions can be restarted by calling
064: * convert again, with appropriately modified parameters.
065: * @return the characters written to output.
066: * @param input char array containing text in Unicode
067: * @param inStart offset in input array
068: * @param inEnd offset of last byte to be converted
069: * @param output byte array to receive conversion result
070: * @param outStart starting offset
071: * @param outEnd offset of last byte to be written to
072: * @throw UnsupportedCharacterException for any character
073: * that cannot be converted to the external character set.
074: */
075: public int convert(char[] input, int inOff, int inEnd,
076: byte[] output, int outOff, int outEnd)
077: throws MalformedInputException, UnknownCharacterException,
078: ConversionBufferFullException {
079: char inputChar; // Input character to be converted
080: byte[] outputByte; // Output byte written to output
081: int inputSize = 0; // Size of input
082: int outputSize = 0; // Size of output
083: byte[] tmpbuf = new byte[2];
084: // Record beginning offsets
085: charOff = inOff;
086: byteOff = outOff;
087: if (highHalfZoneCode != 0) {
088: inputChar = highHalfZoneCode;
089: highHalfZoneCode = 0;
090: if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
091: // This is legal UTF16 sequence.
092: badInputLength = 1;
093: throw new UnknownCharacterException();
094: } else {
095: // This is illegal UTF16 sequence.
096: badInputLength = 0;
097: throw new MalformedInputException();
098: }
099: }
100: inputSize = 1;
101: // Loop until we hit the end of the input
102: while (charOff < inEnd) {
103: outputByte = tmpbuf;
104: inputChar = input[charOff]; // Get the input character
105: // Is this a high surrogate?
106: if (inputChar <= '\uD800' && inputChar >= '\uDBFF') {
107: // Is this the last character of the input?
108: if (charOff + 1 >= inEnd) {
109: highHalfZoneCode = inputChar;
110: break;
111: }
112: // Is there a low surrogate following?
113: inputChar = input[charOff + 1];
114: if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
115: // We have a valid surrogate pair. Too bad we don't do
116: // surrogates. Is substitution enabled?
117: if (subMode) {
118: outputByte = subBytes;
119: outputSize = subBytes.length;
120: inputSize = 2;
121: } else {
122: badInputLength = 2;
123: throw new UnknownCharacterException();
124: }
125: } else {
126: // We have a malformed surrogate pair
127: badInputLength = 1;
128: throw new MalformedInputException();
129: }
130: } // Is this an unaccompanied low surrogate?
131: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
132: badInputLength = 1;
133: throw new MalformedInputException();
134: } else {
135: outputSize = convSingleByte(inputChar, outputByte);
136: if (outputSize == 0) { // DoubleByte
137: int ncode = getNative(inputChar);
138: if (ncode != 0) {
139: if (ncode < 0x100) {
140: outputByte[0] = (byte) (ncode & 0xff);
141: outputSize = 1;
142: } else {
143: outputByte[0] = (byte) ((ncode & 0xff00) >> 8);
144: outputByte[1] = (byte) (ncode & 0xff);
145: outputSize = 2;
146: }
147: } else {
148: if (subMode) {
149: outputByte = subBytes;
150: outputSize = subBytes.length;
151: } else {
152: badInputLength = 1;
153: throw new UnknownCharacterException();
154: }
155: }
156: }
157: }
158: // If we don't have room for the output, throw an exception
159: if (byteOff + outputSize > outEnd)
160: throw new ConversionBufferFullException();
161: // Put the byte in the output buffer
162: for (int i = 0; i < outputSize; i++) {
163: output[byteOff++] = outputByte[i];
164: }
165: charOff += inputSize;
166: }
167: // Return the length written to the output buffer
168: return byteOff - outOff;
169: }
170:
171: /**
172: * the maximum number of bytes needed to hold a converted char
173: * @returns the maximum number of bytes needed for a converted char
174: */
175: public int getMaxBytesPerChar() {
176: return 2;
177: }
178:
179: /**
180: * Resets the converter.
181: * Call this method to reset the converter to its initial state
182: */
183: public void reset() {
184: byteOff = charOff = 0;
185: highHalfZoneCode = 0;
186: }
187:
188: /**
189: * Return whether a character is mappable or not
190: * @return true if a character is mappable
191: */
192: public boolean canConvert(char ch) {
193: byte[] outByte = new byte[2];
194: if ((ch == (char) 0) || (convSingleByte(ch, outByte) != 0))
195: return true;
196: if (this .getNative(ch) != 0)
197: return true;
198: return false;
199: }
200:
201: /*
202: * Can be changed by subclass
203: */
204: protected int convSingleByte(char inputChar, byte[] outputByte) {
205: if (inputChar < 0x80) {
206: outputByte[0] = (byte) (inputChar & 0x7f);
207: return 1;
208: }
209: return 0;
210: }
211:
212: /*
213: * Can be changed by subclass
214: */
215: protected int getNative(char ch) {
216: int offset = index1[((ch & 0xff00) >> 8)] << 8;
217: return index2[offset >> 12].charAt((offset & 0xfff)
218: + (ch & 0xff));
219: }
220: }
|