001: /*
002: Copyright (c) 2004-2005, Dennis M. Sosnoski.
003: All rights reserved.
004:
005: Redistribution and use in source and binary forms, with or without modification,
006: are permitted provided that the following conditions are met:
007:
008: * Redistributions of source code must retain the above copyright notice, this
009: list of conditions and the following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice,
011: this list of conditions and the following disclaimer in the documentation
012: and/or other materials provided with the distribution.
013: * Neither the name of JiBX nor the names of its contributors may be used
014: to endorse or promote products derived from this software without specific
015: prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
018: ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
019: WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
021: ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028:
029: package org.jibx.runtime.impl;
030:
031: import java.io.IOException;
032:
033: import org.jibx.runtime.IXMLWriter;
034:
035: /**
036: * Handler for marshalling text document to a UTF-8 output stream.
037: *
038: * @author Dennis M. Sosnoski
039: * @version 1.0
040: */
041: public class UTF8StreamWriter extends StreamWriterBase {
042: /** Conversion buffer for prefixes; */
043: private byte[] m_converts;
044:
045: /**
046: * Constructor.
047: *
048: * @param uris ordered array of URIs for namespaces used in document (must
049: * be constant; the value in position 0 must always be the empty string "",
050: * and the value in position 1 must always be the XML namespace
051: * "http://www.w3.org/XML/1998/namespace")
052: */
053: public UTF8StreamWriter(String[] uris) {
054: super ("UTF-8", uris);
055: defineNamespace(0, "");
056: defineNamespace(1, "xml");
057: }
058:
059: /**
060: * Copy constructor. This takes the stream from a supplied instance, while
061: * setting a new array of namespace URIs. It's intended for use when
062: * invoking one binding from within another binding.
063: *
064: * @param base instance to be used as base for writer
065: * @param uris ordered array of URIs for namespaces used in document
066: * (see {@link #UTF8StreamWriter(String[])})
067: */
068: public UTF8StreamWriter(UTF8StreamWriter base, String[] uris) {
069: super (base, uris);
070: defineNamespace(0, "");
071: defineNamespace(1, "xml");
072: }
073:
074: /**
075: * Write markup text to output. Markup text can be written directly to the
076: * output without the need for any escaping, but still needs to be properly
077: * encoded.
078: *
079: * @param text markup text to be written
080: * @throws IOException if error writing to document
081: */
082: protected void writeMarkup(String text) throws IOException {
083: int length = text.length();
084: makeSpace(length * 3);
085: int fill = m_fillOffset;
086: for (int i = 0; i < length; i++) {
087: char chr = text.charAt(i);
088: if (chr > 0x7F) {
089: if (chr > 0x7FF) {
090: m_buffer[fill++] = (byte) (0xE0 + (chr >> 12));
091: m_buffer[fill++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
092: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
093: } else {
094: m_buffer[fill++] = (byte) (0xC0 + (chr >> 6));
095: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
096: }
097: } else {
098: m_buffer[fill++] = (byte) chr;
099: }
100: }
101: m_fillOffset = fill;
102: }
103:
104: /**
105: * Write markup character to output. Markup text can be written directly to
106: * the output without the need for any escaping, but still needs to be
107: * properly encoded.
108: *
109: * @param chr markup character to be written
110: * @throws IOException if error writing to document
111: */
112: protected void writeMarkup(char chr) throws IOException {
113: makeSpace(3);
114: if (chr > 0x7F) {
115: if (chr > 0x7FF) {
116: m_buffer[m_fillOffset++] = (byte) (0xE0 + (chr >> 12));
117: m_buffer[m_fillOffset++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
118: m_buffer[m_fillOffset++] = (byte) (0x80 + (chr & 0x3F));
119: } else {
120: m_buffer[m_fillOffset++] = (byte) (0xC0 + (chr >> 6));
121: m_buffer[m_fillOffset++] = (byte) (0x80 + (chr & 0x3F));
122: }
123: } else {
124: m_buffer[m_fillOffset++] = (byte) chr;
125: }
126: }
127:
128: /**
129: * Report that namespace has been defined.
130: *
131: * @param index namespace URI index number
132: * @param prefix prefix used for namespace
133: */
134: protected void defineNamespace(int index, String prefix) {
135: int limit = prefix.length() * 3;
136: if (m_converts == null) {
137: m_converts = new byte[limit];
138: } else if (limit > m_converts.length) {
139: m_converts = new byte[limit];
140: }
141: int fill = 0;
142: for (int i = 0; i < prefix.length(); i++) {
143: char chr = prefix.charAt(i);
144: if (chr > 0x7F) {
145: if (chr > 0x7FF) {
146: m_converts[fill++] = (byte) (0xE0 + (chr >> 12));
147: m_converts[fill++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
148: m_converts[fill++] = (byte) (0x80 + (chr & 0x3F));
149: } else {
150: m_converts[fill++] = (byte) (0xC0 + (chr >> 6));
151: m_converts[fill++] = (byte) (0x80 + (chr & 0x3F));
152: }
153: } else {
154: m_converts[fill++] = (byte) chr;
155: }
156: }
157: byte[] trim;
158: if (fill > 0) {
159: trim = new byte[fill + 1];
160: System.arraycopy(m_converts, 0, trim, 0, fill);
161: trim[fill] = ':';
162: } else {
163: trim = new byte[0];
164: }
165: if (index < m_prefixBytes.length) {
166: m_prefixBytes[index] = trim;
167: } else if (m_extensionBytes != null) {
168: index -= m_prefixBytes.length;
169: for (int i = 0; i < m_extensionBytes.length; i++) {
170: int length = m_extensionBytes[i].length;
171: if (index < length) {
172: m_extensionBytes[i][index] = trim;
173: } else {
174: index -= length;
175: }
176: }
177: } else {
178: throw new IllegalArgumentException("Index out of range");
179: }
180: }
181:
182: /**
183: * Write attribute text to output. This needs to write the text with any
184: * appropriate escaping.
185: *
186: * @param text attribute value text to be written
187: * @throws IOException if error writing to document
188: */
189: protected void writeAttributeText(String text) throws IOException {
190: int length = text.length();
191: makeSpace(length * 6);
192: int fill = m_fillOffset;
193: for (int i = 0; i < length; i++) {
194: char chr = text.charAt(i);
195: if (chr == '"') {
196: fill = writeEntity(QUOT_ENTITY, fill);
197: } else if (chr == '&') {
198: fill = writeEntity(AMP_ENTITY, fill);
199: } else if (chr == '<') {
200: fill = writeEntity(LT_ENTITY, fill);
201: } else if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
202: && text.charAt(i - 2) == ']') {
203: m_buffer[fill++] = (byte) ']';
204: m_buffer[fill++] = (byte) ']';
205: fill = writeEntity(GT_ENTITY, fill);
206: } else if (chr < 0x20) {
207: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
208: throw new IOException("Illegal character code 0x"
209: + Integer.toHexString(chr)
210: + " in attribute value text");
211: } else {
212: m_buffer[fill++] = (byte) chr;
213: }
214: } else {
215: if (chr > 0x7F) {
216: if (chr > 0x7FF) {
217: if (chr > 0xD7FF
218: && (chr < 0xE000 || chr == 0xFFFE
219: || chr == 0xFFFF || chr > 0x10FFFF)) {
220: throw new IOException(
221: "Illegal character code 0x"
222: + Integer.toHexString(chr)
223: + " in attribute value text");
224: } else {
225: m_buffer[fill++] = (byte) (0xE0 + (chr >> 12));
226: m_buffer[fill++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
227: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
228: }
229: } else {
230: m_buffer[fill++] = (byte) (0xC0 + (chr >> 6));
231: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
232: }
233: } else {
234: m_buffer[fill++] = (byte) chr;
235: }
236: }
237: }
238: m_fillOffset = fill;
239: }
240:
241: /**
242: * Write ordinary character data text content to document.
243: *
244: * @param text content value text
245: * @throws IOException on error writing to document
246: */
247: public void writeTextContent(String text) throws IOException {
248: flagTextContent();
249: int length = text.length();
250: makeSpace(length * 5);
251: int fill = m_fillOffset;
252: for (int i = 0; i < length; i++) {
253: char chr = text.charAt(i);
254: if (chr == '&') {
255: fill = writeEntity(AMP_ENTITY, fill);
256: } else if (chr == '<') {
257: fill = writeEntity(LT_ENTITY, fill);
258: } else if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
259: && text.charAt(i - 2) == ']') {
260: fill = writeEntity(GT_ENTITY, fill);
261: } else if (chr < 0x20) {
262: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
263: throw new IOException("Illegal character code 0x"
264: + Integer.toHexString(chr)
265: + " in content text");
266: } else {
267: m_buffer[fill++] = (byte) chr;
268: }
269: } else {
270: if (chr > 0x7F) {
271: if (chr > 0x7FF) {
272: if (chr > 0xD7FF
273: && (chr < 0xE000 || chr == 0xFFFE
274: || chr == 0xFFFF || chr > 0x10FFFF)) {
275: throw new IOException(
276: "Illegal character code 0x"
277: + Integer.toHexString(chr)
278: + " in content text");
279: } else {
280: m_buffer[fill++] = (byte) (0xE0 + (chr >> 12));
281: m_buffer[fill++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
282: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
283: }
284: } else {
285: m_buffer[fill++] = (byte) (0xC0 + (chr >> 6));
286: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
287: }
288: } else {
289: m_buffer[fill++] = (byte) chr;
290: }
291: }
292: }
293: m_fillOffset = fill;
294: }
295:
296: /**
297: * Write CDATA text to document.
298: *
299: * @param text content value text
300: * @throws IOException on error writing to document
301: */
302: public void writeCData(String text) throws IOException {
303: flagTextContent();
304: int length = text.length();
305: makeSpace(length * 3 + 12);
306: int fill = m_fillOffset;
307: fill = writeEntity(LT_CDATASTART, fill);
308: for (int i = 0; i < length; i++) {
309: char chr = text.charAt(i);
310: if (chr == '>' && i > 2 && text.charAt(i - 1) == ']'
311: && text.charAt(i - 2) == ']') {
312: throw new IOException(
313: "Sequence \"]]>\" is not allowed "
314: + "within CDATA section text");
315: } else if (chr < 0x20) {
316: if (chr != 0x9 && chr != 0xA && chr != 0xD) {
317: throw new IOException("Illegal character code 0x"
318: + Integer.toHexString(chr)
319: + " in content text");
320: } else {
321: m_buffer[fill++] = (byte) chr;
322: }
323: } else {
324: if (chr > 0x7F) {
325: if (chr > 0x7FF) {
326: if (chr > 0xD7FF
327: && (chr < 0xE000 || chr == 0xFFFE
328: || chr == 0xFFFF || chr > 0x10FFFF)) {
329: throw new IOException(
330: "Illegal character code 0x"
331: + Integer.toHexString(chr)
332: + " in CDATA section text");
333: } else {
334: m_buffer[fill++] = (byte) (0xE0 + (chr >> 12));
335: m_buffer[fill++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
336: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
337: }
338: } else {
339: m_buffer[fill++] = (byte) (0xC0 + (chr >> 6));
340: m_buffer[fill++] = (byte) (0x80 + (chr & 0x3F));
341: }
342: } else {
343: m_buffer[fill++] = (byte) chr;
344: }
345: }
346: }
347: m_fillOffset = writeEntity(LT_CDATAEND, fill);
348: }
349:
350: /**
351: * Create a child writer instance to be used for a separate binding. The
352: * child writer inherits the stream and encoding from this writer, while
353: * using the supplied namespace URIs.
354: *
355: * @param uris ordered array of URIs for namespaces used in document
356: * (see {@link #UTF8StreamWriter(String[])})
357: * @return child writer
358: */
359: public IXMLWriter createChildWriter(String[] uris) {
360: return new UTF8StreamWriter(this, uris);
361: }
362: }
|