001: /*
002: * @(#)CharToByteSingleByte.java 1.19 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: /**
031: * A table driven conversion from char to byte for single byte
032: * character sets. Tables will reside in the class CharToByteYYYYY,
033: * where YYYYY is a unique character set identifier
034:
035: < TBD: Tables are of the form... >
036:
037: *
038: * @author Lloyd Honomichl
039: * @author Asmus Freytag
040: * @version 8/28/96
041: */
042:
043: public abstract class CharToByteSingleByte extends CharToByteConverter {
044: /*
045: * 1st level index, provided by subclass
046: */
047: protected short index1[];
048: /*
049: * 2nd level index, provided by subclass
050: */
051: protected String index2;
052: /*
053: * Mask to isolate bits for 1st level index, from subclass
054: */
055: protected int mask1;
056: /*
057: * Mask to isolate bits for 2nd level index, from subclass
058: */
059: protected int mask2;
060: /*
061: * Shift to isolate bits for 1st level index, from subclass
062: */
063: protected int shift;
064: private char highHalfZoneCode;
065:
066: public int flush(byte[] output, int outStart, int outEnd)
067: throws MalformedInputException {
068: if (highHalfZoneCode != 0) {
069: highHalfZoneCode = 0;
070: badInputLength = 0;
071: throw new MalformedInputException();
072: }
073: byteOff = charOff = 0;
074: return 0;
075: }
076:
077: /**
078: * Converts characters to sequences of bytes.
079: * Conversions that result in Exceptions can be restarted by calling
080: * convert again, with appropriately modified parameters.
081: * @return the characters written to output.
082: * @param input char array containing text in Unicode
083: * @param inStart offset in input array
084: * @param inEnd offset of last byte to be converted
085: * @param output byte array to receive conversion result
086: * @param outStart starting offset
087: * @param outEnd offset of last byte to be written to
088: * @throw MalformedInputException for any sequence of chars that is
089: * illegal in Unicode (principally unpaired surrogates
090: * and \uFFFF or \uFFFE), including any partial surrogate pair
091: * which occurs at the end of an input buffer.
092: * @throw UnsupportedCharacterException for any character that
093: * that cannot be converted to the external character set.
094: */
095: public int convert(char[] input, int inOff, int inEnd,
096: byte[] output, int outOff, int outEnd)
097: throws MalformedInputException, UnknownCharacterException,
098: ConversionBufferFullException {
099: char inputChar; // Input character to be converted
100: byte[] outputByte; // Output byte written to output
101: int inputSize; // Size of input
102: int outputSize; // Size of output
103: byte[] tmpArray = new byte[1];
104: // Record beginning offsets
105: charOff = inOff;
106: byteOff = outOff;
107: if (highHalfZoneCode != 0) {
108: inputChar = highHalfZoneCode;
109: highHalfZoneCode = 0;
110: if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
111: // This is legal UTF16 sequence.
112: badInputLength = 1;
113: throw new UnknownCharacterException();
114: } else {
115: // This is illegal UTF16 sequence.
116: badInputLength = 0;
117: throw new MalformedInputException();
118: }
119: }
120: // Loop until we hit the end of the input
121: while (charOff < inEnd) {
122: outputByte = tmpArray;
123: // Get the input character
124: inputChar = input[charOff];
125: // Default output size
126: outputSize = 1;
127: // Assume this is a simple character
128: inputSize = 1;
129: // Is this a high surrogate?
130: if (inputChar >= '\uD800' && inputChar <= '\uDBFF') {
131: // Is this the last character in the input?
132: if (charOff + 1 >= inEnd) {
133: highHalfZoneCode = inputChar;
134: break;
135: }
136: // Is there a low surrogate following?
137: inputChar = input[charOff + 1];
138: if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
139: // We have a valid surrogate pair. Too bad we don't map
140: // surrogates. Is substitution enabled?
141: if (subMode) {
142: outputByte = subBytes;
143: outputSize = subBytes.length;
144: inputSize = 2;
145: } else {
146: badInputLength = 2;
147: throw new UnknownCharacterException();
148: }
149: } else {
150: // We have a malformed surrogate pair
151: badInputLength = 1;
152: throw new MalformedInputException();
153: }
154: } // Is this an unaccompanied low surrogate?
155: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
156: badInputLength = 1;
157: throw new MalformedInputException();
158: } // Not part of a surrogate, so look it up
159: else {
160: // Get output using two level lookup
161: outputByte[0] = getNative(inputChar);
162: // Might this character be unmappable?
163: if (outputByte[0] == 0) {
164: // If outputByte is zero because the input was zero
165: // then this character is actually mappable
166: if (input[charOff] != '\u0000') {
167: // We have an unmappable character
168: // Is substitution enabled?
169: if (subMode) {
170: outputByte = subBytes;
171: outputSize = subBytes.length;
172: } else {
173: badInputLength = 1;
174: throw new UnknownCharacterException();
175: }
176: }
177: }
178: }
179: // If we don't have room for the output, throw an exception
180: if (byteOff + outputSize > outEnd)
181: throw new ConversionBufferFullException();
182: // Put the byte in the output buffer
183: for (int i = 0; i < outputSize; i++) {
184: output[byteOff++] = outputByte[i];
185: }
186: charOff += inputSize;
187: }
188: // Return the length written to the output buffer
189: return byteOff - outOff;
190: }
191:
192: /**
193: * the maximum number of bytes needed to hold a converted char
194: * @returns the maximum number of bytes needed for a converted char
195: */
196: public int getMaxBytesPerChar() {
197: return 1;
198: }
199:
200: public byte getNative(char inputChar) {
201: return (byte) index2
202: .charAt(index1[(inputChar & mask1) >> shift]
203: + (inputChar & mask2));
204: }
205:
206: /**
207: * Resets the converter.
208: * Call this method to reset the converter to its initial state
209: */
210: public void reset() {
211: byteOff = charOff = 0;
212: highHalfZoneCode = 0;
213: }
214:
215: /**
216: * Return whether a character is mappable or not
217: * @return true if a character is mappable
218: */
219: public boolean canConvert(char ch) {
220: // Look it up in the table
221: if (index2.charAt(index1[((ch & mask1) >> shift)]
222: + (ch & mask2)) != '\u0000')
223: return true;
224: // Nulls are always mappable
225: return (ch == '\u0000');
226: }
227: }
|