001: package uk.org.ponder.xml;
002:
003: import java.io.IOException;
004: import java.io.OutputStream;
005: import java.io.Writer;
006:
007: import uk.org.ponder.streamutil.write.OutputStreamPOS;
008: import uk.org.ponder.streamutil.write.PrintOutputStream;
009: import uk.org.ponder.streamutil.write.WriterPOS;
010: import uk.org.ponder.stringutil.CharWrap;
011: import uk.org.ponder.util.Logger;
012:
013: /**
014: * A utility class to write XML data either raw or with XML/HTML entity
015: * escaping. An XMLWriter wraps either an OutputStream or a Writer, and supplies
016: * methods with similar names to <code>Writer</code> which escape all
017: * mandatory XML/HTML entities, and methods named with suffix <code>Raw</code>
018: * which write the data without transformation.
019: */
020:
021: public class XMLWriter {
022: /**
023: * The number of characters to indent by for each nesting level of a tag to be
024: * written. Some <code>writeRaw</code> methods accept a nesting level used
025: * as a multiplier for this factor.
026: */
027: public static final int INDENT_WIDTH = 2;
028: /**
029: * The default encoding to be used when writing byte streams - currently the
030: * UTF-8 encoding
031: */
032: public static final String DEFAULT_ENCODING = "UTF-8";
033: private static String DEFAULT_DECLARATION = "<?xml version=\"1.0\" ?>\n";
034: private PrintOutputStream internalwriter;
035:
036: public PrintOutputStream getInternalWriter() {
037: return internalwriter;
038: }
039:
040: /**
041: * Creates an XMLWriter wrapping the supplied OutputStream. Character data is
042: * converted using the default encoding scheme above
043: *
044: * @param os
045: * The output stream to be wrapped.
046: */
047:
048: public XMLWriter(OutputStream os) {
049: internalwriter = new OutputStreamPOS(os, DEFAULT_ENCODING);
050: }
051:
052: /**
053: * Creates an XMLWriter wrapping the supplied Writer.
054: *
055: * @param internalwriter
056: * The writer to be wrapped.
057: */
058:
059: public XMLWriter(Writer internalwriter) {
060: this .internalwriter = new WriterPOS(internalwriter);
061: }
062:
063: public XMLWriter(PrintOutputStream pos) {
064: this .internalwriter = pos;
065: }
066:
067: /**
068: * Writes the supplied data to the wrapped stream without conversion.
069: *
070: * @param towrite
071: * A character array holding the data to be written.
072: * @param start
073: * The offset of the data to be written within the array.
074: * @param length
075: * The length of the data to be written.
076: * @exception IOException
077: * If an I/O error occurs while writing the data.
078: */
079:
080: public void writeRaw(char[] towrite, int start, int length) {
081: internalwriter.write(towrite, start, length);
082: }
083:
084: /**
085: * Writes the supplied string to the wrapped stream without conversion.
086: *
087: * @param tag
088: * The string to be written.
089: * @exception IOException
090: * If an I/O error occurs while writing the string.
091: */
092:
093: public XMLWriter writeRaw(String tag) {
094: internalwriter.print(tag);
095: return this ;
096: }
097:
098: public static void indent(int nestinglevel, PrintOutputStream writer) {
099: for (int i = 0; i < nestinglevel * INDENT_WIDTH; ++i) {
100: writer.print(" ");
101: }
102: }
103:
104: // write with specified indenting and without deentitising
105: /**
106: * Writes the supplied string to the wrapped stream with the specified indent
107: * level.
108: *
109: * @param tag
110: * The string to be written.
111: * @param nestinglevel
112: * The multiplier for the <code>INDENT_WIDTH</code>, giving the
113: * number of spaces to be written before the supplied string.
114: * @exception IOException
115: * If an I/O error occurs while writing the string.
116: */
117: public void writeRaw(String tag, int nestinglevel) {
118: indent(nestinglevel, internalwriter);
119: internalwriter.print(tag);
120: // Logger.println(tag, Logger.DEBUG_SUBATOMIC);
121: }
122:
123: public void closeTag(String tag, int nestinglevel,
124: boolean writtenchildren) {
125: if (writtenchildren) {
126: indent(nestinglevel, internalwriter);
127: internalwriter.print("</");
128: internalwriter.print(tag);
129: internalwriter.print(">");
130: } else {
131: internalwriter.print("/>");
132: }
133: if (nestinglevel >= 0) {
134: internalwriter.print("\n");
135: }
136: }
137:
138: /**
139: * Returns the default declaration that will be written by the
140: * <code>writeDeclaration</code> method.
141: *
142: * @return The required default declaration.
143: */
144: public static String getDefaultDeclaration() {
145: return DEFAULT_DECLARATION;
146: }
147:
148: /**
149: * Writes a default declaration to the wrapped stream.
150: *
151: * @exception IOException
152: * If an I/O error occurs while writing the declaration.
153: */
154:
155: public void writeDeclaration() {
156: internalwriter.print(DEFAULT_DECLARATION);
157: }
158:
159: public static String[] entitytable;
160:
161: static {
162: entitytable = new String['>' + 1];
163: entitytable['&'] = "&";
164: entitytable['<'] = "<";
165: entitytable['>'] = ">";
166: entitytable['"'] = """;
167: // HTML 4.0 does not define ' and does not plan to
168: entitytable['\''] = "'";
169: }
170:
171: /**
172: * Writes the supplied data to the wrapped stream, escaping all mandatory
173: * XML entities, being &, <, >, ".
174: * NB apostrophe is no longer encoded, since this seems to give a measurable
175: * Increase in speed. (' is
176: * escaped to &#39; since HTML 4.0 does not define the &apos; entity
177: * and does not plan to)
178: *
179: * @param towrite
180: * A character array holding the data to be written.
181: * @param start
182: * The offset of the data to be written within the array.
183: * @param length
184: * The length of the data to be written.
185: */
186:
187: // This odd strategy is based on the observation that MOST attributes/XML
188: // data do NOT contain any of the entity characters, but those that do
189: // are likely to contain more than one. This could no doubt be tuned
190: // even further but there is only a maximum of 5% slack left in typical
191: // page rendering -
192: // original timing: 690µs
193: // timing with strategy: 680µs
194: // timing with strategy - apos: 658µs
195: // timing with unencoded write: 650µs
196: // timing with write as no-op: 630µs
197: public final void write(char[] towrite, int start, int length) {
198: int limit = start + length;
199: // String ent = null;
200: //while (length > 0) {
201: for (; length > 0; --length) {
202: char c = towrite[limit - length];
203: if (c == '&' || c == '<' || c == '>' || c == '"')
204: break;
205: //on JDK 1.5, amazingly this line puts it back up to 670 with the 4 cases.
206: //if ((c & 35) != 32) continue;
207: // switch (c) {
208: //
209: // case '&':
210: // // ent = "&";
211: // // break outer;
212: // case '<':
213: // // ent = "<";
214: // // break outer;
215: // case '>':
216: // // ent = ">";
217: // // break outer;
218: // case '"':
219: // // ent = """;
220: // // break outer;
221: // case '\'':
222: // // ent = "'";
223: // break outer;
224: // }
225: }
226: internalwriter.write(towrite, start, limit - start - length);
227: // if (ent != null) {
228: // internalwriter.print(ent);
229: // --length;
230: // }
231: // }
232: if (length > 0) {
233: // writeEntity(towrite[limit - length], internalwriter);
234: // --length;
235: writeSlow(towrite, start + limit - length, length);
236: }
237: //}
238: }
239:
240: public static final void writeEntity(char c, PrintOutputStream pos) {
241: switch (c) {
242: case '&':
243: pos.print("&");
244: return;
245: case '<':
246: pos.print("<");
247: return;
248: case '>':
249: pos.print(">");
250: return;
251: case '"':
252: pos.print(""");
253: return;
254: case '\'':
255: pos.print("'");
256: return;
257: }
258: return;
259: }
260:
261: public final void writeSlow(char[] towrite, int start, int length) {
262: // AMAZINGLY, in 1.5 it is quicker to create this here than economise it.
263: CharWrap svb = new CharWrap(length + 10);
264: int limit = start + length;
265: for (int i = length; i > 0; --i) {
266: char c = towrite[limit - i];
267: switch (c) {
268: case '&':
269: svb.append("&");
270: svb.ensureCapacity(svb.size + i);
271: break;
272: case '<':
273: svb.append("<");
274: svb.ensureCapacity(svb.size + i);
275: break;
276: case '>':
277: svb.append(">");
278: svb.ensureCapacity(svb.size + i);
279: break;
280: case '"':
281: svb.append(""");
282: svb.ensureCapacity(svb.size + i);
283: break;
284:
285: // HTML 4.0 does not define ' and does not plan to
286: case '\'':
287: svb.append("'");
288: svb.ensureCapacity(svb.size + i);
289: break;
290: default:
291: svb.appendFast(c);
292: }
293: // String lookup = c > entitytable.length? null : entitytable[c];
294: // // optimised on the basis that entitising is RARE - we only check
295: // // available capacity at that point.
296: // if (lookup == null) {
297: // svb.appendFast(c);
298: // }
299: // else {
300: // svb.append(lookup);
301: // svb.ensureCapacity(svb.size + (limit - i));
302: // }
303: }
304: internalwriter.write(svb.storage, svb.offset, svb.size);
305: }
306:
307: /**
308: * Writes the supplied data to the wrapped stream, escaping all mandatory
309: * XML/HTML entities, being &, <, >, " and '. ' is
310: * escaped to &#39; since HTML 4.0 does not define the &apos; entity
311: * and does not plan to.
312: *
313: * @param towrite
314: * The string to be written.
315: * @exception IOException
316: * If an I/O error occurs while writing the string.
317: */
318:
319: public void write(String towrite) {
320: char[] array = (towrite == null ? "null" : towrite)
321: .toCharArray();
322: write(array, 0, array.length);
323: }
324:
325: /**
326: * Flushes the wrapped stream.
327: *
328: * @exception IOException
329: * If an I/O error occurs while flushing the stream.
330: */
331:
332: public void flush() {
333: internalwriter.flush();
334: }
335:
336: /**
337: * Closes this XMLWriter object, in effect flushing it and making it unusable
338: * for any further write operations.
339: * <p>
340: * Closing this does not close the underlying input stream!
341: *
342: * @exception IOException
343: * If an I/O error occurs while closing the stream.
344: */
345: public void close() {
346: if (internalwriter != null) {
347: try {
348: flush();
349: } catch (Throwable t) {
350: Logger.println(
351: "Unhandled exception closing XML Writer: " + t,
352: Logger.DEBUG_SEVERE);
353: // internalwriter.close();
354: internalwriter = null;
355: }
356: }
357: }
358: }
|