001: /**
002: * Sequoia: Database clustering technology.
003: * Copyright (C) 2002-2004 French National Institute For Research In Computer
004: * Science And Control (INRIA).
005: * Copyright (C) 2005 AmicoSoft, Inc. dba Emic Networks
006: * Contact: sequoia@continuent.org
007: *
008: * Licensed under the Apache License, Version 2.0 (the "License");
009: * you may not use this file except in compliance with the License.
010: * You may obtain a copy of the License at
011: *
012: * http://www.apache.org/licenses/LICENSE-2.0
013: *
014: * Unless required by applicable law or agreed to in writing, software
015: * distributed under the License is distributed on an "AS IS" BASIS,
016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: * See the License for the specific language governing permissions and
018: * limitations under the License.
019: *
020: * Initial developer(s): Nicolas Modrzyk
021: * Contributor(s): Emmanuel Cecchet, Marc Herbert
022: */package org.continuent.sequoia.common.stream;
023:
024: import java.io.DataOutputStream;
025: import java.io.IOException;
026: import java.io.OutputStream;
027: import java.nio.ByteBuffer;
028: import java.nio.CharBuffer;
029: import java.nio.charset.CharsetEncoder;
030:
031: /**
032: * Decorates a DataOutputStream with the {@link #writeLongUTF(String)} method
033: * that allows writing of UTF strings larger than <code>65535</code> bytes
034: *
035: * @see java.io.DataOutputStream
036: * @author <a href="mailto:Nicolas.Modrzyk@inrialpes.fr">Nicolas Modrzyk </a>
037: * @author <a href="mailto:Emmanuel.Cecchet@inria.fr">Emmanuel Cecchet </a>
038: * @author <a href="mailto:Marc.Herbert@emicnetworks.com">Marc Herbert</a>
039: * @author <a href="mailto:Gilles.Rayrat@emicnetworks.com">Gilles Rayrat</a>
040: */
041: public class LongUTFDataOutputStream extends DataOutputStream {
042:
043: private final CharsetEncoder utf8enc = DriverStream.UTF8Codec
044: .newEncoder();
045:
046: /**
047: * @see DataOutputStream#DataOutputStream(java.io.OutputStream)
048: */
049: public LongUTFDataOutputStream(OutputStream out) {
050: super (out);
051: }
052:
053: /**
054: * Sends UTF strings larger than <code>65535</code> bytes (encoded), chunk
055: * by chunk. Historically the purpose of these functions was to work around
056: * the limitation of {@link java.io.DataInputStream#readUTF()}, but now we
057: * use real UTF8, and no more modified UTF8
058: * http://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8. Chunking is still useful
059: * to avoid handling big strings all at once and being a memory hog.
060: *
061: * @see java.io.DataOutputStream#writeUTF(java.lang.String)
062: * @param string a String to write in UTF form to the stream
063: * @throws IOException if an error occurs
064: */
065: public void writeLongUTF(String string) throws IOException {
066: if (null == string) {
067: super .writeBoolean(false);
068: return;
069: }
070:
071: super .writeBoolean(true);
072: int idx;
073: final int maxSize = DriverStream.STRING_CHUNK_SIZE;
074:
075: this .writeInt(string.length());
076:
077: // First send all full, maxSize long chunks
078: for (idx = 0; idx + maxSize <= string.length(); idx += maxSize)
079: // substring() does no copy, cool.
080: writeUTF8(string.substring(idx, idx + maxSize));
081:
082: // Send the tail separately because
083: // - string.substring(begin, TOO_LONG) is unfortunately not legal.
084: // - we do not send any empty string, this is useless and would complexify
085: // the receiver.
086: // The tail is in most (short) cases just the string as is.
087:
088: if (string.length() > idx)
089: writeUTF8(string.substring(idx));
090: }
091:
092: /**
093: * Sending real UTF-8, not the modified one.
094: *
095: * @throws IOException
096: * @see org.continuent.sequoia.common.protocol.SQLDataSerialization.BytesSerializer
097: */
098: void writeUTF8(String s) throws IOException {
099: if (false) // old code (modified UTF-8). See SEQUOIA-133
100: super .writeUTF(s);
101: else { // new code, real UTF8
102: CharBuffer cb = CharBuffer.wrap(s); // no copy; good.
103: ByteBuffer bb = utf8enc.encode(cb);
104: super .writeInt(bb.remaining());
105: super .write(bb.array(), 0, bb.remaining()); // no copy either
106: }
107: }
108: }
|