001: /**
002: *******************************************************************************
003: * Copyright (C) 2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: *
007: *******************************************************************************
008: */package com.ibm.icu.charset;
009:
010: import java.nio.ByteBuffer;
011: import java.nio.CharBuffer;
012: import java.nio.IntBuffer;
013: import java.nio.charset.CharsetDecoder;
014: import java.nio.charset.CharsetEncoder;
015: import java.nio.charset.CoderResult;
016:
017: import com.ibm.icu.text.UTF16;
018:
019: /**
020: * @author Niti Hantaweepant
021: */
022: class CharsetUTF32LE extends CharsetICU {
023:
024: protected byte[] fromUSubstitution = new byte[] { (byte) 0xfd,
025: (byte) 0xff, (byte) 0, (byte) 0 };
026:
027: public CharsetUTF32LE(String icuCanonicalName,
028: String javaCanonicalName, String[] aliases) {
029: super (icuCanonicalName, javaCanonicalName, aliases);
030: maxBytesPerChar = 4;
031: minBytesPerChar = 4;
032: maxCharsPerByte = 1;
033: }
034:
035: class CharsetDecoderUTF32LE extends CharsetDecoderICU {
036:
037: public CharsetDecoderUTF32LE(CharsetICU cs) {
038: super (cs);
039: }
040:
041: protected CoderResult decodeLoop(ByteBuffer source,
042: CharBuffer target, IntBuffer offsets, boolean flush) {
043: CoderResult cr = CoderResult.UNDERFLOW;
044:
045: int sourceArrayIndex = source.position();
046: int ch, i;
047:
048: donefornow: {
049: /* UTF-8 returns here for only non-offset, this needs to change.*/
050: if (toUnicodeStatus != 0 && target.hasRemaining()) {
051: i = toULength; /* restore # of bytes consumed */
052:
053: ch = (int) (toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
054: toUnicodeStatus = 0;
055: toULength = 0;
056:
057: while (i < 4) {
058: if (sourceArrayIndex < source.limit()) {
059: ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
060: toUBytesArray[i++] = (byte) source
061: .get(sourceArrayIndex++);
062: } else {
063: /* stores a partially calculated target*/
064: /* + 1 to make 0 a valid character */
065: toUnicodeStatus = ch + 1;
066: toULength = (byte) i;
067: break donefornow;
068: }
069: }
070:
071: if (ch <= UConverterConstants.MAXIMUM_UTF
072: && !isSurrogate(ch)) {
073: /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
074: if (ch <= UConverterConstants.MAXIMUM_UCS2) {
075: /* fits in 16 bits */
076: target.put((char) ch);
077: } else {
078: /* write out the surrogates */
079: target.put(UTF16.getLeadSurrogate(ch));
080: ch = UTF16.getTrailSurrogate(ch);
081: if (target.hasRemaining()) {
082: target.put((char) ch);
083: } else {
084: /* Put in overflow buffer (not handled here) */
085: charErrorBufferArray[0] = (char) ch;
086: charErrorBufferLength = 1;
087: cr = CoderResult.OVERFLOW;
088: }
089: }
090: } else {
091: toULength = (byte) i;
092: cr = CoderResult
093: .malformedForLength(sourceArrayIndex);
094: break donefornow;
095: }
096: }
097:
098: while (sourceArrayIndex < source.limit()
099: && target.hasRemaining()) {
100: i = 0;
101: ch = 0;
102:
103: while (i < 4) {
104: if (sourceArrayIndex < source.limit()) {
105: ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
106: toUBytesArray[i++] = (byte) source
107: .get(sourceArrayIndex++);
108: } else {
109: /* stores a partially calculated target*/
110: /* + 1 to make 0 a valid character */
111: toUnicodeStatus = ch + 1;
112: toULength = (byte) i;
113: break donefornow;
114: }
115: }
116:
117: if (ch <= UConverterSharedData.MAXIMUM_UTF
118: && !isSurrogate(ch)) {
119: /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
120: if (ch <= UConverterSharedData.MAXIMUM_UCS2) {
121: /* fits in 16 bits */
122: target.put((char) ch);
123: } else {
124: /* write out the surrogates */
125: target.put(UTF16.getLeadSurrogate(ch));
126: ch = UTF16.getTrailSurrogate(ch);
127: if (target.hasRemaining()) {
128: target.put((char) ch);
129: } else {
130: /* Put in overflow buffer (not handled here) */
131: charErrorBufferArray[0] = (char) ch;
132: charErrorBufferLength = 1;
133: cr = CoderResult.OVERFLOW;
134: break;
135: }
136: }
137: } else {
138: toULength = (byte) i;
139: cr = CoderResult
140: .malformedForLength(sourceArrayIndex);
141: break;
142: }
143: }
144: }
145:
146: if (sourceArrayIndex < source.limit()
147: && !target.hasRemaining()) {
148: /* End of target buffer */
149: cr = CoderResult.OVERFLOW;
150: }
151:
152: source.position(sourceArrayIndex);
153: return cr;
154: }
155: }
156:
157: class CharsetEncoderUTF32LE extends CharsetEncoderICU {
158:
159: public CharsetEncoderUTF32LE(CharsetICU cs) {
160: super (cs, fromUSubstitution);
161: implReset();
162: }
163:
164: private final static int NEED_TO_WRITE_BOM = 1;
165:
166: protected void implReset() {
167: super .implReset();
168: fromUnicodeStatus = NEED_TO_WRITE_BOM;
169: }
170:
171: protected CoderResult encodeLoop(CharBuffer source,
172: ByteBuffer target, IntBuffer offsets, boolean flush) {
173: CoderResult cr = CoderResult.UNDERFLOW;
174: if (!source.hasRemaining()) {
175: /* no input, nothing to do */
176: return cr;
177: }
178:
179: /* write the BOM if necessary */
180: if (fromUnicodeStatus == NEED_TO_WRITE_BOM) {
181: byte[] bom = { (byte) 0xff, (byte) 0xfe, 0, 0 };
182: cr = fromUWriteBytes(this , bom, 0, bom.length, target,
183: offsets, -1);
184: if (cr.isError()) {
185: return cr;
186: }
187: fromUnicodeStatus = 0;
188: }
189:
190: int ch, ch2;
191: int indexToWrite;
192: byte temp[] = new byte[4];
193: temp[3] = 0;
194: int sourceArrayIndex = source.position();
195:
196: boolean doloop = true;
197: if (fromUChar32 != 0) {
198: ch = fromUChar32;
199: fromUChar32 = 0;
200: //lowsurogate:
201: if (sourceArrayIndex < source.limit()) {
202: ch2 = source.get(sourceArrayIndex);
203: if (UTF16.isTrailSurrogate((char) ch2)) {
204: ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT)
205: + ch2
206: + UConverterSharedData.SURROGATE_LOW_BASE;
207: sourceArrayIndex++;
208: } else {
209: /* this is an unmatched trail code unit (2nd surrogate) */
210: /* callback(illegal) */
211: fromUChar32 = ch;
212: cr = CoderResult
213: .malformedForLength(sourceArrayIndex);
214: doloop = false;
215: }
216: } else {
217: /* ran out of source */
218: fromUChar32 = ch;
219: if (flush) {
220: /* this is an unmatched trail code unit (2nd surrogate) */
221: /* callback(illegal) */
222: cr = CoderResult
223: .malformedForLength(sourceArrayIndex);
224: }
225: doloop = false;
226: }
227:
228: /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
229: temp[2] = (byte) (ch >>> 16 & 0x1F);
230: temp[1] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
231: temp[0] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
232:
233: for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
234: if (target.hasRemaining()) {
235: target.put(temp[indexToWrite]);
236: } else {
237: errorBuffer[errorBufferLength++] = temp[indexToWrite];
238: cr = CoderResult.OVERFLOW;
239: }
240: }
241: }
242:
243: if (doloop) {
244: while (sourceArrayIndex < source.limit()
245: && target.hasRemaining()) {
246: ch = source.get(sourceArrayIndex++);
247:
248: if (UTF16.isSurrogate((char) ch)) {
249: if (UTF16.isLeadSurrogate((char) ch)) {
250: //lowsurogate:
251: if (sourceArrayIndex < source.limit()) {
252: ch2 = source.get(sourceArrayIndex);
253: if (UTF16.isTrailSurrogate((char) ch2)) {
254: ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT)
255: + ch2
256: + UConverterSharedData.SURROGATE_LOW_BASE;
257: sourceArrayIndex++;
258: } else {
259: /* this is an unmatched trail code unit (2nd surrogate) */
260: /* callback(illegal) */
261: fromUChar32 = ch;
262: cr = CoderResult.OVERFLOW;
263: break;
264: }
265: } else {
266: /* ran out of source */
267: fromUChar32 = ch;
268: if (flush) {
269: /* this is an unmatched trail code unit (2nd surrogate) */
270: /* callback(illegal) */
271: cr = CoderResult
272: .malformedForLength(sourceArrayIndex);
273: }
274: break;
275: }
276: } else {
277: fromUChar32 = ch;
278: cr = CoderResult
279: .malformedForLength(sourceArrayIndex);
280: break;
281: }
282: }
283:
284: /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
285: temp[2] = (byte) (ch >>> 16 & 0x1F);
286: temp[1] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
287: temp[0] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
288:
289: for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
290: if (target.hasRemaining()) {
291: target.put(temp[indexToWrite]);
292: } else {
293: errorBuffer[errorBufferLength++] = temp[indexToWrite];
294: cr = CoderResult.OVERFLOW;
295: }
296: }
297: }
298: }
299:
300: if (sourceArrayIndex < source.limit()
301: && !target.hasRemaining()) {
302: cr = CoderResult.OVERFLOW;
303: }
304: source.position(sourceArrayIndex);
305: return cr;
306: }
307: }
308:
309: public CharsetDecoder newDecoder() {
310: return new CharsetDecoderUTF32LE(this );
311: }
312:
313: public CharsetEncoder newEncoder() {
314: return new CharsetEncoderUTF32LE(this);
315: }
316: }
|