001: /*
002: * Copyright 2000 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package sun.io;
027:
028: /**
029: * Class for converting characters to bytes for the EUC-JP encoding in
030: * linux. This converter supports the JIS0201 and the JIS0208 encoding and
031: * omits support for the JIS212 encoding.
032: *
033: * @author Naveen Sanjeeva
034: */
035:
036: public class CharToByteEUC_JP_LINUX extends CharToByteJIS0208 {
037: CharToByteJIS0201 cbJIS0201 = new CharToByteJIS0201();
038:
039: public String getCharacterEncoding() {
040: return "EUC_JP_LINUX";
041: }
042:
043: protected int convSingleByte(char inputChar, byte[] outputByte) {
044: byte b;
045:
046: if (inputChar == 0) {
047: outputByte[0] = (byte) 0;
048: return 1;
049: }
050:
051: if ((b = cbJIS0201.getNative(inputChar)) == 0)
052: return 0;
053:
054: if (b > 0 && b < 128) {
055: outputByte[0] = b;
056: return 1;
057: }
058: outputByte[0] = (byte) 0x8E;
059: outputByte[1] = b;
060: return 2;
061: }
062:
063: protected int getNative(char ch) {
064: int offset = index1[((ch & 0xff00) >> 8)] << 8;
065: int r = index2[offset >> 12].charAt((offset & 0xfff)
066: + (ch & 0xff));
067: if (r != 0)
068: return r + 0x8080;
069: return r;
070: }
071:
072: /**
073: * Converts characters to sequences of bytes.
074: * Conversions that result in Exceptions can be restarted by calling
075: * convert again, with appropriately modified parameters.
076: * @return the characters written to output.
077: * @param input char array containing text in Unicode
078: * @param inStart offset in input array
079: * @param inEnd offset of last byte to be converted
080: * @param output byte array to receive conversion result
081: * @param outStart starting offset
082: * @param outEnd offset of last byte to be written to
083: * @throw UnsupportedCharacterException for any character
084: * that cannot be converted to the external character set.
085: */
086: public int convert(char[] input, int inOff, int inEnd,
087: byte[] output, int outOff, int outEnd)
088: throws MalformedInputException, UnknownCharacterException,
089: ConversionBufferFullException {
090: char inputChar; // Input character to be converted
091: byte[] outputByte; // Output byte written to output
092: int inputSize = 0; // Size of input
093: int outputSize = 0; // Size of output
094: byte[] tmpbuf = new byte[4];
095:
096: // Record beginning offsets
097: charOff = inOff;
098: byteOff = outOff;
099:
100: if (highHalfZoneCode != 0) {
101: inputChar = highHalfZoneCode;
102: highHalfZoneCode = 0;
103: if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
104: // This is legal UTF16 sequence.
105: badInputLength = 1;
106: throw new UnknownCharacterException();
107: } else {
108: // This is illegal UTF16 sequence.
109: badInputLength = 0;
110: throw new MalformedInputException();
111: }
112: }
113:
114: // Loop until we hit the end of the input
115: while (charOff < inEnd) {
116: inputSize = 1;
117: outputByte = tmpbuf;
118: inputChar = input[charOff]; // Get the input character
119:
120: // Is this a high surrogate?
121: if (inputChar >= '\uD800' && inputChar <= '\uDBFF') {
122: // Is this the last character of the input?
123: if (charOff + 1 >= inEnd) {
124: highHalfZoneCode = inputChar;
125: break;
126: }
127:
128: // Is there a low surrogate following?
129: inputChar = input[charOff + 1];
130: if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
131: // We have a valid surrogate pair. Too bad we don't do
132: // surrogates. Is substitution enabled?
133: if (subMode) {
134: outputByte = subBytes;
135: outputSize = subBytes.length;
136: inputSize = 2;
137: } else {
138: badInputLength = 2;
139: throw new UnknownCharacterException();
140: }
141: } else {
142: // We have a malformed surrogate pair
143: badInputLength = 1;
144: throw new MalformedInputException();
145: }
146: }
147: // Is this an unaccompanied low surrogate?
148: else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
149: badInputLength = 1;
150: throw new MalformedInputException();
151: } else {
152: outputSize = convSingleByte(inputChar, outputByte);
153: if (outputSize == 0) { // DoubleByte
154: int ncode = getNative(inputChar);
155: if (ncode != 0 && ((ncode & 0xFF0000) == 0)) {
156: outputByte[0] = (byte) ((ncode & 0xff00) >> 8);
157: outputByte[1] = (byte) (ncode & 0xff);
158: outputSize = 2;
159: } else {
160: if (subMode) {
161: outputByte = subBytes;
162: outputSize = subBytes.length;
163: } else {
164: badInputLength = 1;
165: throw new UnknownCharacterException();
166: }
167: }
168: }
169: }
170:
171: // If we don't have room for the output, throw an exception
172: if (byteOff + outputSize > outEnd)
173: throw new ConversionBufferFullException();
174:
175: // Put the byte in the output buffer
176: for (int i = 0; i < outputSize; i++) {
177: output[byteOff++] = outputByte[i];
178: }
179: charOff += inputSize;
180: }
181: // Return the length written to the output buffer
182: return byteOff - outOff;
183: }
184:
185: /**
186: * the maximum number of bytes needed to hold a converted char
187: * @returns the maximum number of bytes needed for a converted char
188: */
189: public int getMaxBytesPerChar() {
190: return 2;
191: }
192: }
|