001: //========================================================================
002: //Copyright 2006 Mort Bay Consulting Pty. Ltd.
003: //------------------------------------------------------------------------
004: //Licensed under the Apache License, Version 2.0 (the "License");
005: //you may not use this file except in compliance with the License.
006: //You may obtain a copy of the License at
007: //http://www.apache.org/licenses/LICENSE-2.0
008: //Unless required by applicable law or agreed to in writing, software
009: //distributed under the License is distributed on an "AS IS" BASIS,
010: //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011: //See the License for the specific language governing permissions and
012: //limitations under the License.
013: //========================================================================
014:
015: package org.mortbay.util;
016:
017: /* ------------------------------------------------------------ */
018: /** UTF-8 StringBuffer.
019: *
020: * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
021: * UTF-8 encoded bytes, that are converted into characters.
022: *
023: * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
024: * state a character is appended to the string buffer.
025: *
026: * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
027: * The UTF-8 code was inspired by http://javolution.org
028: *
029: */
030: public class Utf8StringBuffer {
031: StringBuffer _buffer;
032: int _more;
033: int _bits;
034: boolean _errors;
035:
036: Utf8StringBuffer() {
037: _buffer = new StringBuffer();
038: }
039:
040: Utf8StringBuffer(int capacity) {
041: _buffer = new StringBuffer(capacity);
042: }
043:
044: public void append(byte[] b, int offset, int length) {
045: int end = offset + length;
046: for (int i = offset; i < end; i++)
047: append(b[i]);
048: }
049:
050: public void append(byte b) {
051: if (b > 0) {
052: if (_more > 0) {
053: _buffer.append('?');
054: _more = 0;
055: _bits = 0;
056: } else
057: _buffer.append((char) (0x7f & b));
058: } else if (_more == 0) {
059: if ((b & 0xc0) != 0xc0) {
060: // 10xxxxxx
061: _buffer.append('?');
062: _more = 0;
063: _bits = 0;
064: } else if ((b & 0xe0) == 0xc0) {
065: //110xxxxx
066: _more = 1;
067: _bits = b & 0x1f;
068: } else if ((b & 0xf0) == 0xe0) {
069: //1110xxxx
070: _more = 2;
071: _bits = b & 0x0f;
072: } else if ((b & 0xf8) == 0xf0) {
073: //11110xxx
074: _more = 3;
075: _bits = b & 0x07;
076: } else if ((b & 0xfc) == 0xf8) {
077: //111110xx
078: _more = 4;
079: _bits = b & 0x03;
080: } else if ((b & 0xfe) == 0xfc) {
081: //1111110x
082: _more = 5;
083: _bits = b & 0x01;
084: }
085: } else {
086: if ((b & 0xc0) == 0xc0) { // 11??????
087: _buffer.append('?');
088: _more = 0;
089: _bits = 0;
090: _errors = true;
091: } else {
092: // 10xxxxxx
093: _bits = (_bits << 6) | (b & 0x3f);
094: if (--_more == 0)
095: _buffer.append((char) _bits);
096: }
097: }
098: }
099:
100: public int length() {
101: return _buffer.length();
102: }
103:
104: public void reset() {
105: _buffer.setLength(0);
106: _more = 0;
107: _bits = 0;
108: _errors = false;
109: }
110:
111: public StringBuffer getStringBuffer() {
112: return _buffer;
113: }
114:
115: public String toString() {
116: return _buffer.toString();
117: }
118:
119: /* ------------------------------------------------------------ */
120: /**
121: * @return True if there are non UTF-8 characters or incomplete UTF-8 characters in the buffer.
122: */
123: public boolean isError() {
124: return _errors || _more > 0;
125: }
126: }
|