001: /*
002: * Copyright 1999 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package sun.io;
027:
028: import sun.nio.cs.ext.ISCII91;
029:
030: /*
031: * Copyright (c) 1998 International Business Machines.
032: * All Rights Reserved.
033: *
034: * Author : Sunanda Bera, C. Thirumalesh
035: * Last Modified : 23,November,1998
036: *
037: * Purpose : Defines class ByteToCharISCII91.
038: *
039: *
040: * Revision History
041: * ======== =======
042: *
043: * Date By Description
044: * ---- -- -----------
045: *
046: *
047: */
048:
049: /**
050: * Converter class. Converts between Unicode encoding and ISCII91 encoding.
051: * ISCII91 is the character encoding as defined in Indian Standard document
052: * IS 13194:1991 ( Indian Script Code for Information Interchange ).
053: *
054: * @see sun.io.ByteToCharConverter
055: */
056: public class ByteToCharISCII91 extends ByteToCharConverter {
057:
058: private static final char[] directMapTable = ISCII91
059: .getDirectMapTable();
060:
061: private static final char NUKTA_CHAR = '\u093c';
062: private static final char HALANT_CHAR = '\u094d';
063: private static final char ZWNJ_CHAR = '\u200c';
064: private static final char ZWJ_CHAR = '\u200d';
065: private static final char INVALID_CHAR = '\uffff';
066:
067: private char contextChar = INVALID_CHAR;
068: private boolean needFlushing = false;
069:
070: /**
071: * Converts ISCII91 characters to Unicode.
072: * @see sun.io.ByteToCharConverter#convert
073: */
074: public int convert(byte input[], int inStart, int inEnd,
075: char output[], int outStart, int outEnd)
076: throws ConversionBufferFullException,
077: UnknownCharacterException {
078: /*Rules:
079: * 1)ATR,EXT,following character to be replaced with '\ufffd'
080: * 2)Halant + Halant => '\u094d' (Virama) + '\u200c'(ZWNJ)
081: * 3)Halant + Nukta => '\u094d' (Virama) + '\u200d'(ZWJ)
082: */
083: charOff = outStart;
084: byteOff = inStart;
085: while (byteOff < inEnd) {
086: if (charOff >= outEnd) {
087: throw new ConversionBufferFullException();
088: }
089: int index = input[byteOff++];
090: index = (index < 0) ? (index + 255) : index;
091: char currentChar = directMapTable[index];
092:
093: // if the contextChar is either ATR || EXT set the output to '\ufffd'
094: if (contextChar == '\ufffd') {
095: output[charOff++] = '\ufffd';
096: contextChar = INVALID_CHAR;
097: needFlushing = false;
098: continue;
099: }
100:
101: switch (currentChar) {
102: case '\u0901':
103: case '\u0907':
104: case '\u0908':
105: case '\u090b':
106: case '\u093f':
107: case '\u0940':
108: case '\u0943':
109: case '\u0964':
110: if (needFlushing) {
111: output[charOff++] = contextChar;
112: contextChar = currentChar;
113: continue;
114: }
115: contextChar = currentChar;
116: needFlushing = true;
117: continue;
118: case NUKTA_CHAR:
119: switch (contextChar) {
120: case '\u0901':
121: output[charOff] = '\u0950';
122: break;
123: case '\u0907':
124: output[charOff] = '\u090c';
125: break;
126: case '\u0908':
127: output[charOff] = '\u0961';
128: break;
129: case '\u090b':
130: output[charOff] = '\u0960';
131: break;
132: case '\u093f':
133: output[charOff] = '\u0962';
134: break;
135: case '\u0940':
136: output[charOff] = '\u0963';
137: break;
138: case '\u0943':
139: output[charOff] = '\u0944';
140: break;
141: case '\u0964':
142: output[charOff] = '\u093d';
143: break;
144: case HALANT_CHAR:
145: if (needFlushing) {
146: output[charOff++] = contextChar;
147: contextChar = currentChar;
148: continue;
149: }
150: output[charOff] = ZWJ_CHAR;
151: break;
152: default:
153: if (needFlushing) {
154: output[charOff++] = contextChar;
155: contextChar = currentChar;
156: continue;
157: }
158: output[charOff] = NUKTA_CHAR;
159: }
160: break;
161: case HALANT_CHAR:
162: if (needFlushing) {
163: output[charOff++] = contextChar;
164: contextChar = currentChar;
165: continue;
166: }
167: if (contextChar == HALANT_CHAR) {
168: output[charOff] = ZWNJ_CHAR;
169: break;
170: }
171: output[charOff] = HALANT_CHAR;
172: break;
173: case INVALID_CHAR:
174: if (needFlushing) {
175: output[charOff++] = contextChar;
176: contextChar = currentChar;
177: continue;
178: }
179: if (subMode) {
180: output[charOff] = subChars[0];
181: break;
182: } else {
183: contextChar = INVALID_CHAR;
184: throw new UnknownCharacterException();
185: }
186: default:
187: if (needFlushing) {
188: output[charOff++] = contextChar;
189: contextChar = currentChar;
190: continue;
191: }
192: output[charOff] = currentChar;
193: break;
194: }//end switch
195:
196: contextChar = currentChar;
197: needFlushing = false;
198: charOff++;
199: }//end while
200: return charOff - outStart;
201: } //convert()
202:
203: /**
204: * @see sun.io.ByteToCharConverter#flush
205: */
206: public int flush(char[] output, int outStart, int outEnd)
207: throws MalformedInputException,
208: ConversionBufferFullException {
209: int charsWritten = 0;
210: //if the last char was not flushed, flush it!
211: if (needFlushing) {
212: output[outStart] = contextChar;
213: charsWritten = 1;
214: }
215: contextChar = INVALID_CHAR;
216: needFlushing = false;
217: byteOff = charOff = 0;
218: return charsWritten;
219: }//flush()
220:
221: /**
222: * Returns the character set id for the conversion.
223: */
224: public String getCharacterEncoding() {
225: return "ISCII91";
226: }//getCharacterEncoding()
227:
228: /**
229: * @see sun.io.ByteToCharConverter#reset
230: */
231: public void reset() {
232: byteOff = charOff = 0;
233: }//reset()
234:
235: }//end of class definition
|