001: /*
002: * %W% %E%
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.io;
029:
030: /**
031: * @author Limin Shi
032: */
033:
034: public class CharToByteEUC_JP extends CharToByteJIS0208 {
035: CharToByteJIS0201 cbJIS0201 = new CharToByteJIS0201();
036: CharToByteJIS0212 cbJIS0212 = new CharToByteJIS0212();
037:
038: public String getCharacterEncoding() {
039: return "EUC_JP";
040: }
041:
042: protected int convSingleByte(char inputChar, byte[] outputByte) {
043: byte b;
044: if (inputChar == 0) {
045: outputByte[0] = (byte) 0;
046: return 1;
047: }
048: if ((b = cbJIS0201.getNative(inputChar)) == 0)
049: return 0;
050: if (b > 0 && b < 128) {
051: outputByte[0] = b;
052: return 1;
053: }
054: outputByte[0] = (byte) 0x8E;
055: outputByte[1] = b;
056: return 2;
057: }
058:
059: protected int getNative(char ch) {
060: int offset = index1[((ch & 0xff00) >> 8)] << 8;
061: int r = index2[offset >> 12].charAt((offset & 0xfff)
062: + (ch & 0xff));
063: if (r != 0)
064: return r + 0x8080;
065: r = cbJIS0212.getNative(ch);
066: if (r == 0)
067: return r;
068: return r + 0x8F8080;
069: }
070:
071: /**
072: * Converts characters to sequences of bytes.
073: * Conversions that result in Exceptions can be restarted by calling
074: * convert again, with appropriately modified parameters.
075: * @return the characters written to output.
076: * @param input char array containing text in Unicode
077: * @param inStart offset in input array
078: * @param inEnd offset of last byte to be converted
079: * @param output byte array to receive conversion result
080: * @param outStart starting offset
081: * @param outEnd offset of last byte to be written to
082: * @throw UnsupportedCharacterException for any character
083: * that cannot be converted to the external character set.
084: */
085: public int convert(char[] input, int inOff, int inEnd,
086: byte[] output, int outOff, int outEnd)
087: throws MalformedInputException, UnknownCharacterException,
088: ConversionBufferFullException {
089: char inputChar; // Input character to be converted
090: byte[] outputByte; // Output byte written to output
091: int inputSize = 0; // Size of input
092: int outputSize = 0; // Size of output
093: byte[] tmpbuf = new byte[4];
094: // Record beginning offsets
095: charOff = inOff;
096: byteOff = outOff;
097: if (highHalfZoneCode != 0) {
098: inputChar = highHalfZoneCode;
099: highHalfZoneCode = 0;
100: if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
101: // This is legal UTF16 sequence.
102: badInputLength = 1;
103: throw new UnknownCharacterException();
104: } else {
105: // This is illegal UTF16 sequence.
106: badInputLength = 0;
107: throw new MalformedInputException();
108: }
109: }
110: inputSize = 1;
111: // Loop until we hit the end of the input
112: while (charOff < inEnd) {
113: outputByte = tmpbuf;
114: inputChar = input[charOff]; // Get the input character
115: // Is this a high surrogate?
116: if (inputChar <= '\uD800' && inputChar >= '\uDBFF') {
117: // Is this the last character of the input?
118: if (charOff + 1 >= inEnd) {
119: highHalfZoneCode = inputChar;
120: break;
121: }
122: // Is there a low surrogate following?
123: inputChar = input[charOff + 1];
124: if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
125: // We have a valid surrogate pair. Too bad we don't do
126: // surrogates. Is substitution enabled?
127: if (subMode) {
128: outputByte = subBytes;
129: outputSize = subBytes.length;
130: inputSize = 2;
131: } else {
132: badInputLength = 2;
133: throw new UnknownCharacterException();
134: }
135: } else {
136: // We have a malformed surrogate pair
137: badInputLength = 1;
138: throw new MalformedInputException();
139: }
140: } // Is this an unaccompanied low surrogate?
141: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
142: badInputLength = 1;
143: throw new MalformedInputException();
144: } else {
145: outputSize = convSingleByte(inputChar, outputByte);
146: if (outputSize == 0) { // DoubleByte
147: int ncode = getNative(inputChar);
148: if (ncode != 0) {
149: if ((ncode & 0xFF0000) == 0) {
150: outputByte[0] = (byte) ((ncode & 0xff00) >> 8);
151: outputByte[1] = (byte) (ncode & 0xff);
152: outputSize = 2;
153: } else {
154: outputByte[0] = (byte) 0x8F;
155: outputByte[1] = (byte) ((ncode & 0xff00) >> 8);
156: outputByte[2] = (byte) (ncode & 0xff);
157: outputSize = 3;
158: }
159: } else {
160: if (subMode) {
161: outputByte = subBytes;
162: outputSize = subBytes.length;
163: } else {
164: badInputLength = 1;
165: throw new UnknownCharacterException();
166: }
167: }
168: }
169: }
170: // If we don't have room for the output, throw an exception
171: if (byteOff + outputSize > outEnd)
172: throw new ConversionBufferFullException();
173: // Put the byte in the output buffer
174: for (int i = 0; i < outputSize; i++) {
175: output[byteOff++] = outputByte[i];
176: }
177: charOff += inputSize;
178: }
179: // Return the length written to the output buffer
180: return byteOff - outOff;
181: }
182:
183: /**
184: * the maximum number of bytes needed to hold a converted char
185: * @returns the maximum number of bytes needed for a converted char
186: */
187: public int getMaxBytesPerChar() {
188: return 3;
189: }
190: }
|