001: package com.jclark.xml.output;
002:
003: import java.io.Writer;
004: import java.io.IOException;
005: import java.io.CharConversionException;
006: import java.io.OutputStream;
007:
008: /**
009: * An XMLWriter that encodes characters in UTF-8.
010: * Methods are not synchronized: wrap this in a SyncXMLWriter
011: * if you need to use this concurrently from multiple threads.
012: * @version $Revision: 1.7 $ $Date: 1998/06/28 09:12:04 $
013: */
014:
015: public class UTF8XMLWriter extends XMLWriter {
016: private OutputStream out;
017: private boolean inStartTag = false;
018: private boolean quoteWhitespace = false;
019: private static final int DEFAULT_BUF_LENGTH = 4 * 1024;
020: private byte[] buf = new byte[DEFAULT_BUF_LENGTH];
021: private int bufUsed = 0;
022: private String sysLineSeparator;
023: private String lineSeparator;
024: private boolean minimizeEmptyElements;
025: private boolean minimizeEmptyElementsHtml;
026:
027: static final public int MINIMIZE_EMPTY_ELEMENTS = 1;
028: static final public int MINIMIZE_EMPTY_ELEMENTS_HTML = 2;
029: static final private int DEFAULT_OPTIONS = 0;
030:
031: /**
032: * Create an XML writer that will write in UTF-8 to the specified
033: * OutputStream with the specified options.
034: */
035: public UTF8XMLWriter(OutputStream out, int options) {
036: super (out);
037: this .out = out;
038: if ((options & MINIMIZE_EMPTY_ELEMENTS_HTML) != 0)
039: this .minimizeEmptyElements = this .minimizeEmptyElementsHtml = true;
040: else if ((options & MINIMIZE_EMPTY_ELEMENTS) != 0)
041: this .minimizeEmptyElements = true;
042: sysLineSeparator = lineSeparator = System
043: .getProperty("line.separator");
044: }
045:
046: /**
047: * Create an XML writer that will write in UTF-8 to the specified
048: * OutputStream with the default options.
049: */
050: public UTF8XMLWriter(OutputStream out) {
051: this (out, DEFAULT_OPTIONS);
052: }
053:
054: public void writeUTF8(byte[] buf, int off, int len)
055: throws IOException {
056: if (inStartTag)
057: finishStartTag();
058: while (--len >= 0) {
059: byte b = buf[off++];
060: switch (b) {
061: case (byte) '\n':
062: writeRaw(lineSeparator);
063: break;
064: case (byte) '&':
065: writeRaw("&");
066: break;
067: case (byte) '<':
068: writeRaw("<");
069: break;
070: case (byte) '>':
071: writeRaw(">");
072: break;
073: case (byte) '"':
074: writeRaw(""");
075: break;
076: default:
077: put(b);
078: break;
079: }
080: }
081: }
082:
083: public void write(char cbuf[], int off, int len) throws IOException {
084: if (len == 0)
085: return;
086: if (inStartTag)
087: finishStartTag();
088: do {
089: try {
090: writeQuote(cbuf[off++]);
091: } catch (CharConversionException e) {
092: if (len-- == 0)
093: throw e;
094: writeSurrogatePair(cbuf[off - 1], cbuf[off]);
095: off++;
096: }
097: } while (--len > 0);
098: }
099:
100: public void write(char c) throws IOException {
101: if (inStartTag)
102: finishStartTag();
103: writeQuote(c);
104: }
105:
106: public void write(String str) throws IOException {
107: int len = str.length();
108: if (len == 0)
109: return;
110: if (inStartTag)
111: finishStartTag();
112: writeQuote(str, 0, len);
113: }
114:
115: public void write(String str, int off, int len) throws IOException {
116: if (len == 0)
117: return;
118: if (inStartTag)
119: finishStartTag();
120: writeQuote(str, off, len);
121: }
122:
123: private final void writeQuote(String str, int off, int len)
124: throws IOException {
125: for (; off < len; off++) {
126: try {
127: writeQuote(str.charAt(off));
128: } catch (CharConversionException e) {
129: if (++off == len)
130: throw e;
131: writeSurrogatePair(str.charAt(off - 1), str.charAt(off));
132: }
133: }
134: }
135:
136: private final void writeQuote(char c) throws IOException {
137: switch (c) {
138: case '\n':
139: writeRaw(quoteWhitespace ? " " : lineSeparator);
140: break;
141: case '&':
142: writeRaw("&");
143: break;
144: case '<':
145: writeRaw("<");
146: break;
147: case '>':
148: writeRaw(">");
149: break;
150: case '"':
151: writeRaw(""");
152: break;
153: case '\r':
154: if (quoteWhitespace
155: || !(out instanceof ReplacementTextOutputStream))
156: writeRaw(" ");
157: else
158: put((byte) '\r');
159: break;
160: case '\t':
161: if (quoteWhitespace)
162: writeRaw("	");
163: else
164: put((byte) '\t');
165: break;
166: default:
167: if (c < 0x80)
168: put((byte) c);
169: else
170: writeMB(c);
171: }
172: }
173:
174: private void writeRaw(String str) throws IOException {
175: final int n = str.length();
176: for (int i = 0; i < n; i++) {
177: char c = str.charAt(i);
178: if (c < 0x80)
179: put((byte) c);
180: else {
181: try {
182: writeMB(str.charAt(i));
183: } catch (CharConversionException e) {
184: if (++i == n)
185: throw e;
186: writeSurrogatePair(c, str.charAt(i));
187: }
188: }
189: }
190: }
191:
192: private final void writeMB(char c) throws IOException {
193: switch (c & 0xF800) {
194: case 0:
195: put((byte) (((c >> 6) & 0x1F) | 0xC0));
196: put((byte) ((c & 0x3F) | 0x80));
197: break;
198: default:
199: put((byte) (((c >> 12) & 0xF) | 0xE0));
200: put((byte) (((c >> 6) & 0x3F) | 0x80));
201: put((byte) ((c & 0x3F) | 0x80));
202: break;
203: case 0xD800:
204: throw new CharConversionException();
205: }
206: }
207:
208: private final void writeSurrogatePair(char c1, char c2)
209: throws IOException {
210: if ((c1 & 0xFC00) != 0xD800 || (c2 & 0xFC00) != 0xDC00)
211: throw new CharConversionException();
212: int c = ((c1 & 0x3FF) << 10) | (c2 & 0x3FF);
213: c += 0x10000;
214: put((byte) (((c >> 18) & 0x7) | 0xF0));
215: put((byte) (((c >> 12) & 0x3F) | 0x80));
216: put((byte) (((c >> 6) & 0x3F) | 0x80));
217: put((byte) ((c & 0x3F) | 0x80));
218: }
219:
220: public void startElement(String name) throws IOException {
221: if (inStartTag)
222: finishStartTag();
223: put((byte) '<');
224: writeRaw(name);
225: inStartTag = true;
226: }
227:
228: private void finishStartTag() throws IOException {
229: inStartTag = false;
230: put((byte) '>');
231: }
232:
233: public void attribute(String name, String value) throws IOException {
234: if (!inStartTag)
235: throw new IllegalStateException(
236: "attribute outside of start-tag");
237: put((byte) ' ');
238: writeRaw(name);
239: put((byte) '=');
240: put((byte) '"');
241: quoteWhitespace = true;
242: writeQuote(value, 0, value.length());
243: quoteWhitespace = false;
244: put((byte) '"');
245: }
246:
247: public void startAttribute(String name) throws IOException {
248: if (!inStartTag)
249: throw new IllegalStateException(
250: "attribute outside of start-tag");
251: inStartTag = false;
252: quoteWhitespace = true;
253: put((byte) ' ');
254: writeRaw(name);
255: put((byte) '=');
256: put((byte) '"');
257: }
258:
259: public void endAttribute() throws IOException {
260: put((byte) '"');
261: inStartTag = true;
262: quoteWhitespace = false;
263: }
264:
265: public void endElement(String name) throws IOException {
266: if (inStartTag) {
267: inStartTag = false;
268: if (minimizeEmptyElements) {
269: if (minimizeEmptyElementsHtml)
270: put((byte) ' ');
271: put((byte) '/');
272: put((byte) '>');
273: return;
274: }
275: put((byte) '>');
276: }
277: put((byte) '<');
278: put((byte) '/');
279: writeRaw(name);
280: put((byte) '>');
281: }
282:
283: public void processingInstruction(String target, String data)
284: throws IOException {
285: if (inStartTag)
286: finishStartTag();
287: put((byte) '<');
288: put((byte) '?');
289: writeRaw(target);
290: if (data.length() > 0) {
291: put((byte) ' ');
292: writeMarkup(data);
293: }
294: put((byte) '?');
295: put((byte) '>');
296: }
297:
298: public void comment(String body) throws IOException {
299: if (inStartTag)
300: finishStartTag();
301: writeRaw("<!--");
302: writeMarkup(body);
303: writeRaw("-->");
304: }
305:
306: public void entityReference(boolean isParam, String name)
307: throws IOException {
308: if (inStartTag)
309: finishStartTag();
310: put(isParam ? (byte) '%' : (byte) '&');
311: writeRaw(name);
312: put((byte) ';');
313: }
314:
315: public void characterReference(int n) throws IOException {
316: if (inStartTag)
317: finishStartTag();
318: writeRaw("&#");
319: writeRaw(Integer.toString(n));
320: put((byte) ';');
321: }
322:
323: public void cdataSection(String content) throws IOException {
324: if (inStartTag)
325: finishStartTag();
326: writeRaw("<![CDATA[");
327: writeMarkup(content);
328: writeRaw("]]>");
329: }
330:
331: public void markup(String str) throws IOException {
332: if (inStartTag)
333: finishStartTag();
334: writeMarkup(str);
335: }
336:
337: private static class ReplacementTextOutputStream extends
338: OutputStream {
339: private OutputStream out;
340:
341: ReplacementTextOutputStream(OutputStream out) {
342: this .out = out;
343: }
344:
345: public void write(int b) throws IOException {
346: switch (b) {
347: case '"':
348: case '\'':
349: case '%':
350: case '&':
351: case '\r':
352: out.write('&');
353: out.write('#');
354: {
355: String s = Integer.toString(b);
356: for (int i = 0; i < s.length(); i++)
357: out.write(s.charAt(i));
358: }
359: out.write(';');
360: break;
361: default:
362: out.write(b);
363: break;
364: }
365: }
366:
367: public void close() throws IOException {
368: out.close();
369: }
370:
371: public void flush() throws IOException {
372: out.flush();
373: }
374:
375: OutputStream getOutputStream() {
376: return out;
377: }
378: }
379:
380: public void startReplacementText() throws IOException {
381: flushBuf();
382: out = new ReplacementTextOutputStream(out);
383: lineSeparator = "\n";
384: }
385:
386: public void endReplacementText() throws IOException {
387: flushBuf();
388: out = ((ReplacementTextOutputStream) out).getOutputStream();
389: if (!(out instanceof ReplacementTextOutputStream))
390: lineSeparator = sysLineSeparator;
391: }
392:
393: private void writeMarkup(String str) throws IOException {
394: int len = str.length();
395: for (int i = 0; i < len; i++) {
396: char c = str.charAt(i);
397: if (c == '\n')
398: writeRaw(lineSeparator);
399: else {
400: try {
401: if (c < 0x80)
402: put((byte) c);
403: else
404: writeMB(c);
405: } catch (CharConversionException e) {
406: if (++i == len)
407: throw e;
408: writeSurrogatePair(c, str.charAt(i));
409: }
410: }
411: }
412: }
413:
414: private final void put(byte b) throws IOException {
415: if (bufUsed == buf.length)
416: flushBuf();
417: buf[bufUsed++] = b;
418: }
419:
420: private final void flushBuf() throws IOException {
421: out.write(buf, 0, bufUsed);
422: bufUsed = 0;
423: }
424:
425: public void flush() throws IOException {
426: if (bufUsed != 0)
427: flushBuf();
428: out.flush();
429: }
430:
431: public void close() throws IOException {
432: if (out != null) {
433: if (bufUsed != 0)
434: flushBuf();
435: out.close();
436: out = null;
437: buf = null;
438: }
439: }
440: }
|