001: /**
002: *******************************************************************************
003: * Copyright (C) 2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: *
007: *******************************************************************************
008: */package com.ibm.icu.charset;
009:
010: import java.nio.ByteBuffer;
011: import java.nio.CharBuffer;
012: import java.nio.IntBuffer;
013: import java.nio.charset.CharsetDecoder;
014: import java.nio.charset.CharsetEncoder;
015: import java.nio.charset.CoderResult;
016:
017: import com.ibm.icu.text.UTF16;
018:
019: /**
020: * @author Niti Hantaweepant
021: */
022: class CharsetUTF32 extends CharsetICU {
023:
024: protected byte[] fromUSubstitution = new byte[] { (byte) 0,
025: (byte) 0, (byte) 0xff, (byte) 0xfd };
026:
027: public CharsetUTF32(String icuCanonicalName,
028: String javaCanonicalName, String[] aliases) {
029: super (icuCanonicalName, javaCanonicalName, aliases);
030: maxBytesPerChar = 4;
031: minBytesPerChar = 4;
032: maxCharsPerByte = 1;
033: }
034:
035: class CharsetDecoderUTF32 extends CharsetDecoderICU {
036:
037: public CharsetDecoderUTF32(CharsetICU cs) {
038: super (cs);
039: }
040:
041: protected CoderResult decodeLoop(ByteBuffer source,
042: CharBuffer target, IntBuffer offsets, boolean flush) {
043: CoderResult cr = CoderResult.UNDERFLOW;
044:
045: int sourceArrayIndex = source.position();
046: int ch, i;
047:
048: donefornow: {
049: /* UTF-8 returns here for only non-offset, this needs to change.*/
050: if (toUnicodeStatus != 0 && target.hasRemaining()) {
051: i = toULength; /* restore # of bytes consumed */
052:
053: ch = (int) (toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
054: toUnicodeStatus = 0;
055: toULength = 0;
056:
057: while (i < 4) {
058: if (sourceArrayIndex < source.limit()) {
059: ch = (ch << 8)
060: | ((byte) (source
061: .get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
062: toUBytesArray[i++] = (byte) source
063: .get(sourceArrayIndex++);
064: } else {
065: /* stores a partially calculated target*/
066: /* + 1 to make 0 a valid character */
067: toUnicodeStatus = ch + 1;
068: toULength = (byte) i;
069: break donefornow;
070: }
071: }
072:
073: if (ch <= UConverterConstants.MAXIMUM_UTF
074: && !isSurrogate(ch)) {
075: /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
076: if (ch <= UConverterConstants.MAXIMUM_UCS2) {
077: /* fits in 16 bits */
078: target.put((char) ch);
079: } else {
080: /* write out the surrogates */
081: target.put(UTF16.getLeadSurrogate(ch));
082: ch = UTF16.getTrailSurrogate(ch);
083: if (target.hasRemaining()) {
084: target.put((char) ch);
085: } else {
086: /* Put in overflow buffer (not handled here) */
087: charErrorBufferArray[0] = (char) ch;
088: charErrorBufferLength = 1;
089: cr = CoderResult.OVERFLOW;
090: }
091: }
092: } else {
093: toULength = (byte) i;
094: cr = CoderResult
095: .malformedForLength(sourceArrayIndex);
096: break donefornow;
097: }
098: }
099:
100: while (sourceArrayIndex < source.limit()
101: && target.hasRemaining()) {
102: i = 0;
103: ch = 0;
104:
105: while (i < 4) {
106: if (sourceArrayIndex < source.limit()) {
107: ch = (ch << 8)
108: | ((byte) (source
109: .get(sourceArrayIndex)) & UConverterConstants.UNSIGNED_BYTE_MASK);
110: toUBytesArray[i++] = (byte) source
111: .get(sourceArrayIndex++);
112: } else {
113: /* stores a partially calculated target*/
114: /* + 1 to make 0 a valid character */
115: toUnicodeStatus = ch + 1;
116: toULength = (byte) i;
117: break donefornow;
118: }
119: }
120:
121: if (ch <= UConverterSharedData.MAXIMUM_UTF
122: && !isSurrogate(ch)) {
123: /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
124: if (ch <= UConverterSharedData.MAXIMUM_UCS2) {
125: /* fits in 16 bits */
126: target.put((char) ch);
127: } else {
128: /* write out the surrogates */
129: target.put(UTF16.getLeadSurrogate(ch));
130: ch = UTF16.getTrailSurrogate(ch);
131: if (target.hasRemaining()) {
132: target.put((char) ch);
133: } else {
134: /* Put in overflow buffer (not handled here) */
135: charErrorBufferArray[0] = (char) ch;
136: charErrorBufferLength = 1;
137: cr = CoderResult.OVERFLOW;
138: break;
139: }
140: }
141: } else {
142: toULength = (byte) i;
143: cr = CoderResult
144: .malformedForLength(sourceArrayIndex);
145: break;
146: }
147: }
148: }
149:
150: if (sourceArrayIndex < source.limit()
151: && !target.hasRemaining()) {
152: /* End of target buffer */
153: cr = CoderResult.OVERFLOW;
154: }
155:
156: source.position(sourceArrayIndex);
157: return cr;
158: }
159: }
160:
161: class CharsetEncoderUTF32 extends CharsetEncoderICU {
162:
163: public CharsetEncoderUTF32(CharsetICU cs) {
164: super (cs, fromUSubstitution);
165: implReset();
166: }
167:
168: private final static int NEED_TO_WRITE_BOM = 1;
169:
170: protected void implReset() {
171: super .implReset();
172: fromUnicodeStatus = NEED_TO_WRITE_BOM;
173: }
174:
175: protected CoderResult encodeLoop(CharBuffer source,
176: ByteBuffer target, IntBuffer offsets, boolean flush) {
177: CoderResult cr = CoderResult.UNDERFLOW;
178: if (!source.hasRemaining()) {
179: /* no input, nothing to do */
180: return cr;
181: }
182:
183: /* write the BOM if necessary */
184: if (fromUnicodeStatus == NEED_TO_WRITE_BOM) {
185: byte[] bom = { 0, 0, (byte) 0xfe, (byte) 0xff };
186: cr = fromUWriteBytes(this , bom, 0, bom.length, target,
187: offsets, -1);
188: if (cr.isError()) {
189: return cr;
190: }
191: fromUnicodeStatus = 0;
192: }
193:
194: int ch, ch2;
195: int indexToWrite;
196: byte temp[] = new byte[4];
197: temp[0] = 0;
198: int sourceArrayIndex = source.position();
199:
200: boolean doloop = true;
201: if (fromUChar32 != 0) {
202: ch = fromUChar32;
203: fromUChar32 = 0;
204: //lowsurogate:
205: if (sourceArrayIndex < source.limit()) {
206: ch2 = source.get(sourceArrayIndex);
207: if (UTF16.isTrailSurrogate((char) ch2)) {
208: ch = ((ch - UConverterConstants.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT)
209: + ch2
210: + UConverterSharedData.SURROGATE_LOW_BASE;
211: sourceArrayIndex++;
212: } else {
213: /* this is an unmatched trail code unit (2nd surrogate) */
214: /* callback(illegal) */
215: fromUChar32 = ch;
216: cr = CoderResult
217: .malformedForLength(sourceArrayIndex);
218: doloop = false;
219: }
220: } else {
221: /* ran out of source */
222: fromUChar32 = ch;
223: if (flush) {
224: /* this is an unmatched trail code unit (2nd surrogate) */
225: /* callback(illegal) */
226: cr = CoderResult
227: .malformedForLength(sourceArrayIndex);
228: }
229: doloop = false;
230: }
231:
232: /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
233: temp[1] = (byte) (ch >>> 16 & 0x1F);
234: temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
235: temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
236:
237: for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
238: if (target.hasRemaining()) {
239: target.put(temp[indexToWrite]);
240: } else {
241: errorBuffer[errorBufferLength++] = temp[indexToWrite];
242: cr = CoderResult.OVERFLOW;
243: }
244: }
245: }
246:
247: if (doloop) {
248: while (sourceArrayIndex < source.limit()
249: && target.hasRemaining()) {
250: ch = source.get(sourceArrayIndex++);
251:
252: if (UTF16.isSurrogate((char) ch)) {
253: if (UTF16.isLeadSurrogate((char) ch)) {
254: //lowsurogate:
255: if (sourceArrayIndex < source.limit()) {
256: ch2 = source.get(sourceArrayIndex);
257: if (UTF16.isTrailSurrogate((char) ch2)) {
258: ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT)
259: + ch2
260: + UConverterSharedData.SURROGATE_LOW_BASE;
261: sourceArrayIndex++;
262: } else {
263: /* this is an unmatched trail code unit (2nd surrogate) */
264: /* callback(illegal) */
265: fromUChar32 = ch;
266: cr = CoderResult.OVERFLOW;
267: break;
268: }
269: } else {
270: /* ran out of source */
271: fromUChar32 = ch;
272: if (flush) {
273: /* this is an unmatched trail code unit (2nd surrogate) */
274: /* callback(illegal) */
275: cr = CoderResult
276: .malformedForLength(sourceArrayIndex);
277: }
278: break;
279: }
280: } else {
281: fromUChar32 = ch;
282: cr = CoderResult
283: .malformedForLength(sourceArrayIndex);
284: break;
285: }
286: }
287:
288: /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
289: temp[1] = (byte) (ch >>> 16 & 0x1F);
290: temp[2] = (byte) (ch >>> 8); /* unsigned cast implicitly does (ch & FF) */
291: temp[3] = (byte) (ch); /* unsigned cast implicitly does (ch & FF) */
292:
293: for (indexToWrite = 0; indexToWrite <= 3; indexToWrite++) {
294: if (target.hasRemaining()) {
295: target.put(temp[indexToWrite]);
296: } else {
297: errorBuffer[errorBufferLength++] = temp[indexToWrite];
298: cr = CoderResult.OVERFLOW;
299: }
300: }
301: }
302: }
303:
304: if (sourceArrayIndex < source.limit()
305: && !target.hasRemaining()) {
306: cr = CoderResult.OVERFLOW;
307: }
308: source.position(sourceArrayIndex);
309: return cr;
310: }
311: }
312:
313: public CharsetDecoder newDecoder() {
314: return new CharsetDecoderUTF32(this );
315: }
316:
317: public CharsetEncoder newEncoder() {
318: return new CharsetEncoderUTF32(this);
319: }
320: }
|