001: /*
002: * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
003: * Copyright (C) 2005 - Javolution (http://javolution.org/)
004: * All rights reserved.
005: *
006: * Permission to use, copy, modify, and distribute this software is
007: * freely granted, provided that this notice is preserved.
008: */
009: package javolution.io;
010:
011: import j2me.lang.IllegalStateException;
012: import j2me.io.CharConversionException;
013: import j2me.nio.BufferUnderflowException;
014: import j2me.nio.ByteBuffer;
015:
016: import java.io.IOException;
017: import java.io.Reader;
018:
019: import javolution.lang.Reusable;
020: import javolution.text.Appendable;
021:
022: /**
023: * <p> This class represents a UTF-8 <code>j2me.nio.ByteBuffer</code> reader.
024: * </p>
025: *
026: * <p> This reader can be used for efficient decoding of native byte
027: * buffers (e.g. <code>MappedByteBuffer</code>), high-performance
028: * messaging (no intermediate buffer), etc.</p>
029: *
030: * <p> This reader supports surrogate <code>char</code> pairs (representing
031: * characters in the range [U+10000 .. U+10FFFF]). It can also be used
032: * to read characters unicodes (31 bits) directly
033: * (ref. {@link #read()}).</p>
034: *
035: * <p> Each invocation of one of the <code>read()</code> methods may cause one
036: * or more bytes to be read from the underlying byte buffer.
037: * The end of stream is reached when the byte buffer position and limit
038: * coincide.</p>
039: *
040: * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
041: * @version 2.0, December 9, 2004
042: * @see UTF8ByteBufferWriter
043: */
044: public final class UTF8ByteBufferReader extends Reader implements
045: Reusable {
046:
047: /**
048: * Holds the byte buffer source.
049: */
050: private ByteBuffer _byteBuffer;
051:
052: /**
053: * Default constructor.
054: */
055: public UTF8ByteBufferReader() {
056: }
057:
058: /**
059: * Sets the <code>ByteBuffer</code> to use for reading available bytes
060: * from current buffer position.
061: *
062: * @param byteBuffer the <code>ByteBuffer</code> source.
063: * @return this UTF-8 reader.
064: * @throws IllegalStateException if this reader is being reused and
065: * it has not been {@link #close closed} or {@link #reset reset}.
066: */
067: public UTF8ByteBufferReader setInput(ByteBuffer byteBuffer) {
068: if (_byteBuffer != null)
069: throw new IllegalStateException(
070: "Reader not closed or reset");
071: _byteBuffer = byteBuffer;
072: return this ;
073: }
074:
075: /**
076: * Indicates if this stream is ready to be read.
077: *
078: * @return <code>true</code> if the byte buffer has remaining bytes to
079: * read; <code>false</code> otherwise.
080: * @throws IOException if an I/O error occurs.
081: */
082: public boolean ready() throws IOException {
083: if (_byteBuffer != null) {
084: return _byteBuffer.hasRemaining();
085: } else {
086: throw new IOException("Reader closed");
087: }
088: }
089:
090: /**
091: * Closes and {@link #reset resets} this reader for reuse.
092: *
093: * @throws IOException if an I/O error occurs.
094: */
095: public void close() throws IOException {
096: if (_byteBuffer != null) {
097: reset();
098: }
099: }
100:
101: /**
102: * Reads a single character. This method does not block, <code>-1</code>
103: * is returned if the buffer's limit has been reached.
104: *
105: * @return the 31-bits Unicode of the character read, or -1 if there is
106: * no more remaining bytes to be read.
107: * @throws IOException if an I/O error occurs (e.g. incomplete
108: * character sequence being read).
109: */
110: public int read() throws IOException {
111: if (_byteBuffer != null) {
112: if (_byteBuffer.hasRemaining()) {
113: byte b = _byteBuffer.get();
114: return (b >= 0) ? b : read2(b);
115: } else {
116: return -1;
117: }
118: } else {
119: throw new IOException("Reader closed");
120: }
121: }
122:
123: // Reads one full character, throws CharConversionException if limit reached.
124: private int read2(byte b) throws IOException {
125: try {
126: // Decodes UTF-8.
127: if ((b >= 0) && (_moreBytes == 0)) {
128: // 0xxxxxxx
129: return b;
130: } else if (((b & 0xc0) == 0x80) && (_moreBytes != 0)) {
131: // 10xxxxxx (continuation byte)
132: _code = (_code << 6) | (b & 0x3f); // Adds 6 bits to code.
133: if (--_moreBytes == 0) {
134: return _code;
135: } else {
136: return read2(_byteBuffer.get());
137: }
138: } else if (((b & 0xe0) == 0xc0) && (_moreBytes == 0)) {
139: // 110xxxxx
140: _code = b & 0x1f;
141: _moreBytes = 1;
142: return read2(_byteBuffer.get());
143: } else if (((b & 0xf0) == 0xe0) && (_moreBytes == 0)) {
144: // 1110xxxx
145: _code = b & 0x0f;
146: _moreBytes = 2;
147: return read2(_byteBuffer.get());
148: } else if (((b & 0xf8) == 0xf0) && (_moreBytes == 0)) {
149: // 11110xxx
150: _code = b & 0x07;
151: _moreBytes = 3;
152: return read2(_byteBuffer.get());
153: } else if (((b & 0xfc) == 0xf8) && (_moreBytes == 0)) {
154: // 111110xx
155: _code = b & 0x03;
156: _moreBytes = 4;
157: return read2(_byteBuffer.get());
158: } else if (((b & 0xfe) == 0xfc) && (_moreBytes == 0)) {
159: // 1111110x
160: _code = b & 0x01;
161: _moreBytes = 5;
162: return read2(_byteBuffer.get());
163: } else {
164: throw new CharConversionException(
165: "Invalid UTF-8 Encoding");
166: }
167: } catch (BufferUnderflowException e) {
168: throw new CharConversionException("Incomplete Sequence");
169: }
170: }
171:
172: private int _code;
173:
174: private int _moreBytes;
175:
176: /**
177: * Reads characters into a portion of an array. This method does not
178: * block.
179: *
180: * <p> Note: Characters between U+10000 and U+10FFFF are represented
181: * by surrogate pairs (two <code>char</code>).</p>
182: *
183: * @param cbuf the destination buffer.
184: * @param off the offset at which to start storing characters.
185: * @param len the maximum number of characters to read
186: * @return the number of characters read, or -1 if there is no more
187: * byte remaining.
188: * @throws IOException if an I/O error occurs.
189: */
190: public int read(char cbuf[], int off, int len) throws IOException {
191: if (_byteBuffer == null)
192: throw new IOException("Reader closed");
193: final int off_plus_len = off + len;
194: int remaining = _byteBuffer.remaining();
195: if (remaining <= 0)
196: return -1;
197: for (int i = off; i < off_plus_len;) {
198: if (remaining-- > 0) {
199: byte b = _byteBuffer.get();
200: if (b >= 0) {
201: cbuf[i++] = (char) b; // Most common case.
202: } else {
203: if (i < off_plus_len - 1) { // Up to two 'char' can be read.
204: int code = read2(b);
205: remaining = _byteBuffer.remaining(); // Recalculates.
206: if (code < 0x10000) {
207: cbuf[i++] = (char) code;
208: } else if (code <= 0x10ffff) { // Surrogates.
209: cbuf[i++] = (char) (((code - 0x10000) >> 10) + 0xd800);
210: cbuf[i++] = (char) (((code - 0x10000) & 0x3ff) + 0xdc00);
211: } else {
212: throw new CharConversionException(
213: "Cannot convert U+"
214: + Integer.toHexString(code)
215: + " to char (code greater than U+10FFFF)");
216: }
217: } else { // Not enough space in destination (go back).
218: _byteBuffer
219: .position(_byteBuffer.position() - 1);
220: remaining++;
221: return i - off;
222: }
223: }
224: } else {
225: return i - off;
226: }
227: }
228: return len;
229: }
230:
231: /**
232: * Reads characters into the specified appendable. This method does not
233: * block.
234: *
235: * <p> Note: Characters between U+10000 and U+10FFFF are represented
236: * by surrogate pairs (two <code>char</code>).</p>
237: *
238: * @param dest the destination buffer.
239: * @throws IOException if an I/O error occurs.
240: */
241: public void read(Appendable dest) throws IOException {
242: if (_byteBuffer == null)
243: throw new IOException("Reader closed");
244: while (_byteBuffer.hasRemaining()) {
245: byte b = _byteBuffer.get();
246: if (b >= 0) {
247: dest.append((char) b); // Most common case.
248: } else {
249: int code = read2(b);
250: if (code < 0x10000) {
251: dest.append((char) code);
252: } else if (code <= 0x10ffff) { // Surrogates.
253: dest
254: .append((char) (((code - 0x10000) >> 10) + 0xd800));
255: dest
256: .append((char) (((code - 0x10000) & 0x3ff) + 0xdc00));
257: } else {
258: throw new CharConversionException(
259: "Cannot convert U+"
260: + Integer.toHexString(code)
261: + " to char (code greater than U+10FFFF)");
262: }
263: }
264: }
265: }
266:
267: // Implements Reusable.
268: public void reset() {
269: _byteBuffer = null;
270: _code = 0;
271: _moreBytes = 0;
272: }
273:
274: /**
275: * @deprecated Replaced by {@link #setInput(ByteBuffer)}
276: */
277: public UTF8ByteBufferReader setByteBuffer(ByteBuffer byteBuffer) {
278: return this.setInput(byteBuffer);
279: }
280:
281: }
|