001: /*
002: * $Id: UTF8OutputStreamWriter.java,v 1.3 2007/02/23 20:16:37 joehw Exp $
003: */
004:
005: /*
006: * The contents of this file are subject to the terms
007: * of the Common Development and Distribution License
008: * (the License). You may not use this file except in
009: * compliance with the License.
010: *
011: * You can obtain a copy of the license at
012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
013: * See the License for the specific language governing
014: * permissions and limitations under the License.
015: *
016: * When distributing Covered Code, include this CDDL
017: * Header Notice in each file and include the License file
018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
019: * If applicable, add the following below the CDDL Header,
020: * with the fields enclosed by brackets [] replaced by
021: * you own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * [Name of File] [ver.__] [Date]
025: *
026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
027: */
028:
029: package com.sun.xml.stream.writers;
030:
031: import java.io.Writer;
032: import java.io.OutputStream;
033: import java.io.IOException;
034:
035: import com.sun.xml.stream.xerces.util.XMLChar;
036:
037: /**
038: * <p>This class is used to write a stream of chars as a stream of
039: * bytes using the UTF8 encoding. It assumes that the underlying
040: * output stream is buffered or does not need additional buffering.</p>
041: *
042: * <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
043: * because it does not need to be wrapped in a
044: * <code>java.io.BufferedWriter</code>. Creating multiple instances
045: * of <code>java.io.BufferedWriter</code> has been shown to be very
046: * expensive in JAX-WS.</p>
047: *
048: * @author Santiago.PericasGeertsen@sun.com
049: */
050: public final class UTF8OutputStreamWriter extends Writer {
051:
052: /**
053: * Undelying output stream. This class assumes that this
054: * output stream does not need buffering.
055: */
056: OutputStream out;
057:
058: /**
059: * Java represents chars that are not in the Basic Multilingual
060: * Plane (BMP) in UTF-16. This int stores the first code unit
061: * for a code point encoded in two UTF-16 code units.
062: */
063: int lastUTF16CodePoint = 0;
064:
065: public UTF8OutputStreamWriter(OutputStream out) {
066: this .out = out;
067: }
068:
069: public String getEncoding() {
070: return "UTF-8";
071: }
072:
073: public void write(int c) throws IOException {
074: // Check in we are encoding at high and low surrogates
075: if (lastUTF16CodePoint != 0) {
076: final int uc = (((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
077:
078: if (uc < 0 || uc >= 0x200000) {
079: throw new IOException(
080: "Atttempting to write invalid Unicode code point '"
081: + uc + "'");
082: }
083:
084: out.write(0xF0 | (uc >> 18));
085: out.write(0x80 | ((uc >> 12) & 0x3F));
086: out.write(0x80 | ((uc >> 6) & 0x3F));
087: out.write(0x80 | (uc & 0x3F));
088:
089: lastUTF16CodePoint = 0;
090: return;
091: }
092:
093: // Otherwise, encode char as defined in UTF-8
094: if (c < 0x80) {
095: // 1 byte, 7 bits
096: out.write((int) c);
097: } else if (c < 0x800) {
098: // 2 bytes, 11 bits
099: out.write(0xC0 | (c >> 6)); // first 5
100: out.write(0x80 | (c & 0x3F)); // second 6
101: } else if (c <= '\uFFFF') {
102: if (!XMLChar.isHighSurrogate(c)
103: && !XMLChar.isLowSurrogate(c)) {
104: // 3 bytes, 16 bits
105: out.write(0xE0 | (c >> 12)); // first 4
106: out.write(0x80 | ((c >> 6) & 0x3F)); // second 6
107: out.write(0x80 | (c & 0x3F)); // third 6
108: } else {
109: lastUTF16CodePoint = c;
110: }
111: }
112: }
113:
114: public void write(char cbuf[]) throws IOException {
115: for (int i = 0; i < cbuf.length; i++) {
116: write(cbuf[i]);
117: }
118: }
119:
120: public void write(char cbuf[], int off, int len) throws IOException {
121: for (int i = 0; i < len; i++) {
122: write(cbuf[off + i]);
123: }
124: }
125:
126: public void write(String str) throws IOException {
127: final int len = str.length();
128: for (int i = 0; i < len; i++) {
129: write(str.charAt(i));
130: }
131: }
132:
133: public void write(String str, int off, int len) throws IOException {
134: for (int i = 0; i < len; i++) {
135: write(str.charAt(off + i));
136: }
137: }
138:
139: public void flush() throws IOException {
140: out.flush();
141: }
142:
143: public void close() throws IOException {
144: if (lastUTF16CodePoint != 0) {
145: throw new IllegalStateException(
146: "Attempting to close a UTF8OutputStreamWriter"
147: + " while awaiting for a UTF-16 code unit");
148: }
149: out.close();
150: }
151:
152: }
|