001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common Development
008: * and Distribution License("CDDL") (collectively, the "License"). You
009: * may not use this file except in compliance with the License. You can obtain
010: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
011: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
012: * language governing permissions and limitations under the License.
013: *
014: * When distributing the software, include this License Header Notice in each
015: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
016: * Sun designates this particular file as subject to the "Classpath" exception
017: * as provided by Sun in the GPL Version 2 section of the License file that
018: * accompanied this code. If applicable, add the following below the License
019: * Header, with the fields enclosed by brackets [] replaced by your own
020: * identifying information: "Portions Copyrighted [year]
021: * [name of copyright owner]"
022: *
023: * Contributor(s):
024: *
025: * If you wish your version of this file to be governed by only the CDDL or
026: * only the GPL Version 2, indicate your decision by adding "[Contributor]
027: * elects to include this software in this distribution under the [CDDL or GPL
028: * Version 2] license." If you don't indicate a single choice of license, a
029: * recipient has the option to distribute your version of this file under
030: * either the CDDL, the GPL Version 2 or to extend the choice of license to
031: * its licensees as provided above. However, if you add GPL Version 2 code
032: * and therefore, elected the GPL Version 2 license, then the option applies
033: * only if the new code is made subject to such option by the copyright
034: * holder.
035: */
036:
037: package com.sun.xml.bind.v2.runtime.output;
038:
039: import java.io.IOException;
040:
041: /**
042: * Buffer for UTF-8 encoded string.
043: *
044: * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
045: *
046: * @author Kohsuke Kawaguchi
047: */
048: public final class Encoded {
049: public byte[] buf;
050:
051: public int len;
052:
053: public Encoded() {
054: }
055:
056: public Encoded(String text) {
057: set(text);
058: }
059:
060: public void ensureSize(int size) {
061: if (buf == null || buf.length < size)
062: buf = new byte[size];
063: }
064:
065: public final void set(String text) {
066: int length = text.length();
067:
068: ensureSize(length * 3 + 1); // +1 for append
069:
070: int ptr = 0;
071:
072: for (int i = 0; i < length; i++) {
073: final char chr = text.charAt(i);
074: if (chr > 0x7F) {
075: if (chr > 0x7FF) {
076: if (Character.MIN_HIGH_SURROGATE <= chr
077: && chr <= Character.MAX_LOW_SURROGATE) {
078: // surrogate
079: int uc = (((chr & 0x3ff) << 10) | (text
080: .charAt(++i) & 0x3ff)) + 0x10000;
081:
082: buf[ptr++] = (byte) (0xF0 | ((uc >> 18)));
083: buf[ptr++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
084: buf[ptr++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
085: buf[ptr++] = (byte) (0x80 + (uc & 0x3F));
086: continue;
087: }
088: buf[ptr++] = (byte) (0xE0 + (chr >> 12));
089: buf[ptr++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
090: } else {
091: buf[ptr++] = (byte) (0xC0 + (chr >> 6));
092: }
093: buf[ptr++] = (byte) (0x80 + (chr & 0x3F));
094: } else {
095: buf[ptr++] = (byte) chr;
096: }
097: }
098:
099: len = ptr;
100: }
101:
102: /**
103: * Fill in the buffer by encoding the specified characters
104: * while escaping characters like <
105: *
106: * @param isAttribute
107: * if true, characters like \t, \r, and \n are also escaped.
108: */
109: public final void setEscape(String text, boolean isAttribute) {
110: int length = text.length();
111: ensureSize(length * 6 + 1); // in the worst case the text is like """""", so we need 6 bytes per char
112:
113: int ptr = 0;
114:
115: for (int i = 0; i < length; i++) {
116: final char chr = text.charAt(i);
117:
118: int ptr1 = ptr;
119: if (chr > 0x7F) {
120: if (chr > 0x7FF) {
121: if (Character.MIN_HIGH_SURROGATE <= chr
122: && chr <= Character.MAX_LOW_SURROGATE) {
123: // surrogate
124: int uc = (((chr & 0x3ff) << 10) | (text
125: .charAt(++i) & 0x3ff)) + 0x10000;
126:
127: buf[ptr++] = (byte) (0xF0 | ((uc >> 18)));
128: buf[ptr++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
129: buf[ptr++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
130: buf[ptr++] = (byte) (0x80 + (uc & 0x3F));
131: continue;
132: }
133: buf[ptr1++] = (byte) (0xE0 + (chr >> 12));
134: buf[ptr1++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
135: } else {
136: buf[ptr1++] = (byte) (0xC0 + (chr >> 6));
137: }
138: buf[ptr1++] = (byte) (0x80 + (chr & 0x3F));
139: } else {
140: byte[] ent;
141:
142: if ((ent = attributeEntities[chr]) != null) {
143: // the majority of the case is just printed as a char,
144: // so it's very important to reject them as quickly as possible
145:
146: // check again to see if this really needs to be escaped
147: if (isAttribute || entities[chr] != null)
148: ptr1 = writeEntity(ent, ptr1);
149: else
150: buf[ptr1++] = (byte) chr;
151: } else
152: buf[ptr1++] = (byte) chr;
153: }
154: ptr = ptr1;
155: }
156: len = ptr;
157: }
158:
159: private int writeEntity(byte[] entity, int ptr) {
160: System.arraycopy(entity, 0, buf, ptr, entity.length);
161: return ptr + entity.length;
162: }
163:
164: /**
165: * Writes the encoded bytes to the given output stream.
166: */
167: public final void write(UTF8XmlOutput out) throws IOException {
168: out.write(buf, 0, len);
169: }
170:
171: /**
172: * Appends a new character to the end of the buffer.
173: * This assumes that you have enough space in the buffer.
174: */
175: public void append(char b) {
176: buf[len++] = (byte) b;
177: }
178:
179: /**
180: * Reallocate the buffer to the exact size of the data
181: * to reduce the memory footprint.
182: */
183: public void compact() {
184: byte[] b = new byte[len];
185: System.arraycopy(buf, 0, b, 0, len);
186: buf = b;
187: }
188:
189: /**
190: * UTF-8 encoded entities keyed by their character code.
191: * e.g., entities['&'] == AMP_ENTITY.
192: *
193: * In attributes we need to encode more characters.
194: */
195: private static final byte[][] entities = new byte[0x80][];
196: private static final byte[][] attributeEntities = new byte[0x80][];
197:
198: static {
199: add('&', "&", false);
200: add('<', "<", false);
201: add('>', ">", false);
202: add('"', """, false);
203: add('\t', "	", true);
204: add('\r', "
", false);
205: add('\n', "
", true);
206: }
207:
208: private static void add(char c, String s, boolean attOnly) {
209: byte[] image = UTF8XmlOutput.toBytes(s);
210: attributeEntities[c] = image;
211: if (!attOnly)
212: entities[c] = image;
213: }
214: }
|