001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/util/CmsXmlSaxWriter.java,v $
003: * Date : $Date: 2008-02-27 12:05:36 $
004: * Version: $Revision: 1.16 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.util;
033:
034: import org.opencms.i18n.CmsEncoder;
035: import org.opencms.main.OpenCms;
036:
037: import java.io.IOException;
038: import java.io.StringWriter;
039: import java.io.Writer;
040:
041: import org.xml.sax.Attributes;
042: import org.xml.sax.SAXException;
043: import org.xml.sax.ext.LexicalHandler;
044: import org.xml.sax.helpers.DefaultHandler;
045:
046: /**
047: * Simple SAX event handler that generates a XML (or HTML) file from the events caught.<p>
048: *
049: * This can be used for writing large XML files where keeping a DOM structure
050: * in memory might cause out-of-memory issues, like e.g. when writing the
051: * OpenCms export files.<p>
052: *
053: * It can also be used if a <code>{@link org.xml.sax.ContentHandler}</code> is needed that should
054: * generate a XML / HTML file from a series of SAX events.<p>
055: *
056: * @author Alexander Kandzior
057: *
058: * @version $Revision: 1.16 $
059: *
060: * @since 6.0.0
061: */
062: public class CmsXmlSaxWriter extends DefaultHandler implements
063: LexicalHandler {
064:
065: /** The indentation to use. */
066: private static final String INDENT_STR = "\t";
067:
068: /** The file encoding to use. */
069: private String m_encoding;
070:
071: /**
072: * Indicates if characters that are not part of the selected encoding
073: * are to be replaced with the XML <code>&#123;</code> entity representation
074: * in the generated output (not in CDATA elements).
075: */
076: private boolean m_escapeUnknownChars;
077:
078: /** Indicates if XML entities are to be encoded in the generated output (not in CDATA elements). */
079: private boolean m_escapeXml;
080:
081: /** The indentation level. */
082: private int m_indentLevel;
083:
084: /** Indicates if a CDATA node is still open. */
085: private boolean m_isCdata;
086:
087: /** The last element name written to the output. */
088: private String m_lastElementName;
089:
090: /** Indicates if a CDATA node needs to be opened. */
091: private boolean m_openCdata;
092:
093: /** Indicates if an element tag is still open. */
094: private boolean m_openElement;
095:
096: /** The Writer to write the output to. */
097: private Writer m_writer;
098:
099: /**
100: * Creates a SAX event handler that generates XML / HTML Strings from the events caught
101: * using a new <code>{@link StringWriter}</code> and the OpenCms default encoding.<p>
102: */
103: public CmsXmlSaxWriter() {
104:
105: this (new StringWriter(), OpenCms.getSystemInfo()
106: .getDefaultEncoding());
107: }
108:
109: /**
110: * Creates a SAX event handler that generates XML / HTML Strings from the events caught
111: * using a new <code>{@link StringWriter}</code> and the given encoding.<p>
112: *
113: * @param encoding the encoding for the XML file
114: */
115: public CmsXmlSaxWriter(String encoding) {
116:
117: this (new StringWriter(), encoding);
118: }
119:
120: /**
121: * Creates a SAX event handler that generates XML / HTML Strings from the events caught
122: * using a new <code>{@link StringWriter}</code> and the given encoding.<p>
123: *
124: * @param writer the Writer to write to output to
125: */
126: public CmsXmlSaxWriter(Writer writer) {
127:
128: this (writer, OpenCms.getSystemInfo().getDefaultEncoding());
129: }
130:
131: /**
132: * A SAX event handler that generates XML / HTML Strings from the events caught and writes them
133: * to the given Writer.<p>
134: *
135: * @param writer the Writer to write to output to
136: * @param encoding the encoding for the XML file
137: */
138: public CmsXmlSaxWriter(Writer writer, String encoding) {
139:
140: m_writer = writer;
141: m_encoding = encoding;
142: m_indentLevel = 0;
143: m_escapeXml = true;
144: m_escapeUnknownChars = false;
145: }
146:
147: /**
148: * @see org.xml.sax.ContentHandler#characters(char[], int, int)
149: */
150: public void characters(char[] buf, int offset, int len)
151: throws SAXException {
152:
153: if (len == 0) {
154: return;
155: }
156: if (m_openElement) {
157: write(">");
158: m_openElement = false;
159: }
160: if (m_openCdata) {
161: write("<![CDATA[");
162: m_openCdata = false;
163: }
164: if (m_escapeXml && !m_isCdata) {
165: // XML should be escaped and we are not in a CDATA node
166: String escaped = new String(buf, offset, len);
167: // escape HTML entities ('<' becomes '<')
168: escaped = CmsEncoder.escapeXml(escaped, true);
169: if (m_escapeUnknownChars) {
170: // escape all chars that can not be displayed in the selected encoding (using '{' entities)
171: escaped = CmsEncoder.adjustHtmlEncoding(escaped,
172: getEncoding());
173: }
174: write(escaped);
175: } else {
176: // no escaping or in CDATA node
177: write(new String(buf, offset, len));
178: }
179: }
180:
181: /**
182: * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
183: */
184: public void comment(char[] ch, int start, int length) {
185:
186: // ignore
187: }
188:
189: /**
190: * @see org.xml.sax.ext.LexicalHandler#endCDATA()
191: */
192: public void endCDATA() throws SAXException {
193:
194: if (!m_openCdata) {
195: write("]]>");
196: }
197: m_openCdata = false;
198: m_isCdata = false;
199: }
200:
201: /**
202: * @see org.xml.sax.ContentHandler#endDocument()
203: */
204: public void endDocument() throws SAXException {
205:
206: try {
207: if (m_openElement) {
208: write("/>");
209: m_openElement = false;
210: }
211: writeNewLine();
212: m_writer.flush();
213: } catch (IOException e) {
214: throw new SAXException(Messages.get().getBundle().key(
215: Messages.ERR_IOERROR_0), e);
216: }
217: }
218:
219: /**
220: * @see org.xml.sax.ext.LexicalHandler#endDTD()
221: */
222: public void endDTD() {
223:
224: // NOOP
225: }
226:
227: /**
228: * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
229: */
230: public void endElement(String namespaceURI, String localName,
231: String qualifiedName) throws SAXException {
232:
233: String elementName = resolveName(localName, qualifiedName);
234: if (m_openElement) {
235: write("/>");
236: } else {
237: if (!elementName.equals(m_lastElementName)) {
238: writeNewLine();
239: }
240: write("</");
241: write(elementName);
242: write(">");
243: }
244: m_openElement = false;
245: m_indentLevel--;
246: }
247:
248: /**
249: * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
250: */
251: public void endEntity(String name) {
252:
253: // NOOP
254: }
255:
256: /**
257: * Returns the encoding this XML Sax writer was initialized with.<p>
258: *
259: * @return the encoding this XML Sax writer was initialized with
260: */
261: public String getEncoding() {
262:
263: return m_encoding;
264: }
265:
266: /**
267: * Returns the Writer where the XML is written to.<p>
268: *
269: * @return the Writer where the XML is written to
270: */
271: public Writer getWriter() {
272:
273: return m_writer;
274: }
275:
276: /**
277: * Returns <code>true</code> if charactes that are not part of the selected encoding
278: * are to be replaced with the HTML <code>&#123;</code> entity representation
279: * in the generated output (not in CDATA elements).<p>
280: *
281: * @return <code>true</code> if charactes that are not part of the selected encoding
282: * are to be replaced with the HTML entity representation
283: */
284: public boolean isEscapeUnknownChars() {
285:
286: return m_escapeUnknownChars;
287: }
288:
289: /**
290: * Returns <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements).<p>
291: *
292: * @return <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements)
293: */
294: public boolean isEscapeXml() {
295:
296: return m_escapeXml;
297: }
298:
299: /**
300: * Sets the encoding to use for the generated output.<p>
301: *
302: * @param value the encoding to use for the generated output
303: */
304: public void setEncoding(String value) {
305:
306: m_encoding = value;
307: }
308:
309: /**
310: * If set to <code>true</code>, then charactes that are not part of the selected encoding
311: * are to be replaced with the XML <code>&#123;</code> entity representation
312: * in the generated output (not in CDATA elements).<p>
313: *
314: * @param value indicates to escape unknown characters with XML entities or not
315: */
316: public void setEscapeUnknownChars(boolean value) {
317:
318: m_escapeUnknownChars = value;
319: }
320:
321: /**
322: * If set to <code>true</code>, then
323: * XML entities are to be encoded in the generated output (not in CDATA elements).<p>
324: *
325: * @param value indicates to to escape characters with XML entities or not
326: */
327: public void setEscapeXml(boolean value) {
328:
329: m_escapeXml = value;
330: }
331:
332: /**
333: * @see org.xml.sax.ext.LexicalHandler#startCDATA()
334: */
335: public void startCDATA() {
336:
337: m_openCdata = true;
338: m_isCdata = true;
339: }
340:
341: /**
342: * @see org.xml.sax.ContentHandler#startDocument()
343: */
344: public void startDocument() throws SAXException {
345:
346: write("<?xml version=\"1.0\" encoding=\"");
347: write(m_encoding);
348: write("\"?>");
349: writeNewLine();
350: }
351:
352: /**
353: * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
354: */
355: public void startDTD(String name, String publicId, String systemId)
356: throws SAXException {
357:
358: write("<!DOCTYPE ");
359: write(name);
360: if (publicId != null) {
361: write(" PUBLIC \"");
362: write(publicId);
363: write("\"");
364: }
365: if (systemId != null) {
366: write(" SYSTEM \"");
367: write(systemId);
368: write("\"");
369: }
370: write(">");
371: writeNewLine();
372: }
373:
374: /**
375: * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
376: */
377: public void startElement(String namespaceURI, String localName,
378: String qualifiedName, Attributes attributes)
379: throws SAXException {
380:
381: if (m_openElement) {
382: write(">");
383: m_openElement = false;
384: }
385: // increase indent and write linebreak
386: m_indentLevel++;
387: writeNewLine();
388: // get element name and write entry
389: m_lastElementName = resolveName(localName, qualifiedName);
390: write("<");
391: write(m_lastElementName);
392: if (attributes != null) {
393: for (int i = 0; i < attributes.getLength(); i++) {
394: write(" ");
395: write(resolveName(attributes.getLocalName(i),
396: attributes.getQName(i)));
397: write("=\"");
398: write(attributes.getValue(i));
399: write("\"");
400: }
401: }
402: m_openElement = true;
403: }
404:
405: /**
406: * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
407: */
408: public void startEntity(String name) {
409:
410: // ignore
411: }
412:
413: /**
414: * Resolves the local vs. the qualified name.<p>
415: *
416: * If the local name is the empty String "", the qualified name is used.<p>
417: *
418: * @param localName the local name
419: * @param qualifiedName the qualified XML 1.0 name
420: * @return the resolved name to use
421: */
422: private String resolveName(String localName, String qualifiedName) {
423:
424: if ((localName == null) || (localName.length() == 0)) {
425: return qualifiedName;
426: } else {
427: return localName;
428: }
429: }
430:
431: /**
432: * Writes s String to the output stream.<p>
433: *
434: * @param s the String to write
435: * @throws SAXException in case of I/O errors
436: */
437: private void write(String s) throws SAXException {
438:
439: try {
440: m_writer.write(s);
441: } catch (IOException e) {
442: throw new SAXException(Messages.get().getBundle().key(
443: Messages.ERR_IOERROR_0), e);
444: }
445: }
446:
447: /**
448: * Writes a linebreak to the output stream, also handles the indentation.<p>
449: *
450: * @throws SAXException in case of I/O errors
451: */
452: private void writeNewLine() throws SAXException {
453:
454: try {
455: // write new line
456: m_writer.write("\r\n");
457: // write indentation
458: for (int i = 1; i < m_indentLevel; i++) {
459: m_writer.write(INDENT_STR);
460: }
461: // flush the stream
462: m_writer.flush();
463: } catch (IOException e) {
464: throw new SAXException(Messages.get().getBundle().key(
465: Messages.ERR_IOERROR_0), e);
466: }
467: }
468: }
|