001: /*
002: * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
003: * Copyright (C) 2006 - Javolution (http://javolution.org/)
004: * All rights reserved.
005: *
006: * Permission to use, copy, modify, and distribute this software is
007: * freely granted, provided that this notice is preserved.
008: */
009: package javolution.io;
010:
011: import j2me.lang.CharSequence;
012: import j2me.lang.IllegalStateException;
013: import j2me.io.CharConversionException;
014: import java.io.IOException;
015: import java.io.OutputStream;
016: import java.io.Writer;
017:
018: import javolution.lang.Reusable;
019:
020: /**
021: * <p> This class represents a UTF-8 stream writer.</p>
022: *
023: * <p> This writer supports surrogate <code>char</code> pairs (representing
024: * characters in the range [U+10000 .. U+10FFFF]). It can also be used
025: * to write characters from their unicodes (31 bits) directly
026: * (ref. {@link #write(int)}).</p>
027: *
028: * <p> Instances of this class can be reused for different output streams
029: * and can be part of a higher level component (e.g. serializer) in order
030: * to avoid dynamic buffer allocation when the destination output changes.
031: * Also wrapping using a <code>java.io.BufferedWriter</code> is unnescessary
032: * as instances of this class embed their own data buffers.</p>
033: *
034: * <p> Note: This writer is unsynchronized and always produces well-formed
035: * UTF-8 sequences.</p>
036: *
037: * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
038: * @version 2.0, December 9, 2004
039: */
040: public final class UTF8StreamWriter extends Writer implements Reusable {
041:
042: /**
043: * Holds the current output stream or <code>null</code> if closed.
044: */
045: private OutputStream _outputStream;
046:
047: /**
048: * Holds the bytes' buffer.
049: */
050: private final byte[] _bytes;
051:
052: /**
053: * Holds the bytes buffer index.
054: */
055: private int _index;
056:
057: /**
058: * Creates a UTF-8 writer having a byte buffer of moderate capacity (2048).
059: */
060: public UTF8StreamWriter() {
061: _bytes = new byte[2048];
062: }
063:
064: /**
065: * Creates a UTF-8 writer having a byte buffer of specified capacity.
066: *
067: * @param capacity the capacity of the byte buffer.
068: */
069: public UTF8StreamWriter(int capacity) {
070: _bytes = new byte[capacity];
071: }
072:
073: /**
074: * Sets the output stream to use for writing until this writer is closed.
075: * For example:[code]
076: * Writer writer = new UTF8StreamWriter().setOutputStream(out);
077: * [/code] is equivalent but writes faster than [code]
078: * Writer writer = new j2me.io.OutputStreamWriter(out, "UTF-8");
079: * [/code]
080: *
081: * @param out the output stream.
082: * @return this UTF-8 writer.
083: * @throws IllegalStateException if this writer is being reused and
084: * it has not been {@link #close closed} or {@link #reset reset}.
085: */
086: public UTF8StreamWriter setOutput(OutputStream out) {
087: if (_outputStream != null)
088: throw new IllegalStateException(
089: "Writer not closed or reset");
090: _outputStream = out;
091: return this ;
092: }
093:
094: /**
095: * Writes a single character. This method supports 16-bits
096: * character surrogates.
097: *
098: * @param c <code>char</code> the character to be written (possibly
099: * a surrogate).
100: * @throws IOException if an I/O error occurs.
101: */
102: public void write(char c) throws IOException {
103: if ((c < 0xd800) || (c > 0xdfff)) {
104: write((int) c);
105: } else if (c < 0xdc00) { // High surrogate.
106: _highSurrogate = c;
107: } else { // Low surrogate.
108: int code = ((_highSurrogate - 0xd800) << 10) + (c - 0xdc00)
109: + 0x10000;
110: write(code);
111: }
112: }
113:
114: private char _highSurrogate;
115:
116: /**
117: * Writes a character given its 31-bits Unicode.
118: *
119: * @param code the 31 bits Unicode of the character to be written.
120: * @throws IOException if an I/O error occurs.
121: */
122: public void write(int code) throws IOException {
123: if ((code & 0xffffff80) == 0) {
124: _bytes[_index] = (byte) code;
125: if (++_index >= _bytes.length) {
126: flushBuffer();
127: }
128: } else { // Writes more than one byte.
129: write2(code);
130: }
131: }
132:
133: private void write2(int c) throws IOException {
134: if ((c & 0xfffff800) == 0) { // 2 bytes.
135: _bytes[_index] = (byte) (0xc0 | (c >> 6));
136: if (++_index >= _bytes.length) {
137: flushBuffer();
138: }
139: _bytes[_index] = (byte) (0x80 | (c & 0x3f));
140: if (++_index >= _bytes.length) {
141: flushBuffer();
142: }
143: } else if ((c & 0xffff0000) == 0) { // 3 bytes.
144: _bytes[_index] = (byte) (0xe0 | (c >> 12));
145: if (++_index >= _bytes.length) {
146: flushBuffer();
147: }
148: _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
149: if (++_index >= _bytes.length) {
150: flushBuffer();
151: }
152: _bytes[_index] = (byte) (0x80 | (c & 0x3f));
153: if (++_index >= _bytes.length) {
154: flushBuffer();
155: }
156: } else if ((c & 0xff200000) == 0) { // 4 bytes.
157: _bytes[_index] = (byte) (0xf0 | (c >> 18));
158: if (++_index >= _bytes.length) {
159: flushBuffer();
160: }
161: _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
162: if (++_index >= _bytes.length) {
163: flushBuffer();
164: }
165: _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
166: if (++_index >= _bytes.length) {
167: flushBuffer();
168: }
169: _bytes[_index] = (byte) (0x80 | (c & 0x3f));
170: if (++_index >= _bytes.length) {
171: flushBuffer();
172: }
173: } else if ((c & 0xf4000000) == 0) { // 5 bytes.
174: _bytes[_index] = (byte) (0xf8 | (c >> 24));
175: if (++_index >= _bytes.length) {
176: flushBuffer();
177: }
178: _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
179: if (++_index >= _bytes.length) {
180: flushBuffer();
181: }
182: _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
183: if (++_index >= _bytes.length) {
184: flushBuffer();
185: }
186: _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
187: if (++_index >= _bytes.length) {
188: flushBuffer();
189: }
190: _bytes[_index] = (byte) (0x80 | (c & 0x3f));
191: if (++_index >= _bytes.length) {
192: flushBuffer();
193: }
194: } else if ((c & 0x80000000) == 0) { // 6 bytes.
195: _bytes[_index] = (byte) (0xfc | (c >> 30));
196: if (++_index >= _bytes.length) {
197: flushBuffer();
198: }
199: _bytes[_index] = (byte) (0x80 | ((c >> 24) & 0x3f));
200: if (++_index >= _bytes.length) {
201: flushBuffer();
202: }
203: _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
204: if (++_index >= _bytes.length) {
205: flushBuffer();
206: }
207: _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3F));
208: if (++_index >= _bytes.length) {
209: flushBuffer();
210: }
211: _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3F));
212: if (++_index >= _bytes.length) {
213: flushBuffer();
214: }
215: _bytes[_index] = (byte) (0x80 | (c & 0x3F));
216: if (++_index >= _bytes.length) {
217: flushBuffer();
218: }
219: } else {
220: throw new CharConversionException("Illegal character U+"
221: + Integer.toHexString(c));
222: }
223: }
224:
225: /**
226: * Writes a portion of an array of characters.
227: *
228: * @param cbuf the array of characters.
229: * @param off the offset from which to start writing characters.
230: * @param len the number of characters to write.
231: * @throws IOException if an I/O error occurs.
232: */
233: public void write(char cbuf[], int off, int len) throws IOException {
234: final int off_plus_len = off + len;
235: for (int i = off; i < off_plus_len;) {
236: char c = cbuf[i++];
237: if (c < 0x80) {
238: _bytes[_index] = (byte) c;
239: if (++_index >= _bytes.length) {
240: flushBuffer();
241: }
242: } else {
243: write(c);
244: }
245: }
246: }
247:
248: /**
249: * Writes a portion of a string.
250: *
251: * @param str a String.
252: * @param off the offset from which to start writing characters.
253: * @param len the number of characters to write.
254: * @throws IOException if an I/O error occurs
255: */
256: public void write(String str, int off, int len) throws IOException {
257: final int off_plus_len = off + len;
258: for (int i = off; i < off_plus_len;) {
259: char c = str.charAt(i++);
260: if (c < 0x80) {
261: _bytes[_index] = (byte) c;
262: if (++_index >= _bytes.length) {
263: flushBuffer();
264: }
265: } else {
266: write(c);
267: }
268: }
269: }
270:
271: /**
272: * Writes the specified character sequence.
273: *
274: * @param csq the character sequence.
275: * @throws IOException if an I/O error occurs
276: */
277: public void write(CharSequence csq) throws IOException {
278: final int length = csq.length();
279: for (int i = 0; i < length;) {
280: char c = csq.charAt(i++);
281: if (c < 0x80) {
282: _bytes[_index] = (byte) c;
283: if (++_index >= _bytes.length) {
284: flushBuffer();
285: }
286: } else {
287: write(c);
288: }
289: }
290: }
291:
292: /**
293: * Flushes the stream. If the stream has saved any characters from the
294: * various write() methods in a buffer, write them immediately to their
295: * intended destination. Then, if that destination is another character or
296: * byte stream, flush it. Thus one flush() invocation will flush all the
297: * buffers in a chain of Writers and OutputStreams.
298: *
299: * @throws IOException if an I/O error occurs.
300: */
301: public void flush() throws IOException {
302: flushBuffer();
303: _outputStream.flush();
304: }
305:
306: /**
307: * Closes and {@link #reset resets} this writer for reuse.
308: *
309: * @throws IOException if an I/O error occurs
310: */
311: public void close() throws IOException {
312: if (_outputStream != null) {
313: flushBuffer();
314: _outputStream.close();
315: reset();
316: }
317: }
318:
319: /**
320: * Flushes the internal bytes buffer.
321: *
322: * @throws IOException if an I/O error occurs
323: */
324: private void flushBuffer() throws IOException {
325: if (_outputStream == null)
326: throw new IOException("Stream closed");
327: _outputStream.write(_bytes, 0, _index);
328: _index = 0;
329: }
330:
331: // Implements Reusable.
332: public void reset() {
333: _highSurrogate = 0;
334: _index = 0;
335: _outputStream = null;
336: }
337:
338: /**
339: * @deprecated Replaced by {@link #setOutput(OutputStream)}
340: */
341: public UTF8StreamWriter setOutputStream(OutputStream out) {
342: return this.setOutput(out);
343: }
344: }
|