001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: // Sep 14, 2000:
059: // Fixed serializer to report IO exception directly, instead at
060: // the end of document processing.
061: // Reported by Patrick Higgins <phiggins@transzap.com>
062:
063: package org.apache.xml.serialize;
064:
065: import java.io.IOException;
066: import java.io.UnsupportedEncodingException;
067: import java.io.OutputStream;
068: import java.io.Writer;
069:
070: import org.w3c.dom.*;
071: import org.xml.sax.DocumentHandler;
072: import org.xml.sax.ContentHandler;
073: import org.xml.sax.AttributeList;
074: import org.xml.sax.Attributes;
075: import org.xml.sax.SAXException;
076:
077: /**
078: * Implements a text serializer supporting both DOM and SAX
079: * serializing. For usage instructions see {@link Serializer}.
080: * <p>
081: * If an output stream is used, the encoding is taken from the
082: * output format (defaults to <tt>UTF-8</tt>). If a writer is
083: * used, make sure the writer uses the same encoding (if applies)
084: * as specified in the output format.
085: * <p>
086: * The serializer supports both DOM and SAX. DOM serializing is done
087: * by calling {@link #serialize} and SAX serializing is done by firing
088: * SAX events and using the serializer as a document handler.
089: * <p>
090: * If an I/O exception occurs while serializing, the serializer
091: * will not throw an exception directly, but only throw it
092: * at the end of serializing (either DOM or SAX's {@link
093: * org.xml.sax.DocumentHandler#endDocument}.
094: *
095: *
096: * @version $Revision: 1.11 $ $Date: 2001/07/20 20:37:06 $
097: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
098: * @see Serializer
099: */
100: public class TextSerializer extends BaseMarkupSerializer {
101:
102: /**
103: * Constructs a new serializer. The serializer cannot be used without
104: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
105: * first.
106: */
107: public TextSerializer() {
108: super (new OutputFormat(Method.TEXT, null, false));
109: }
110:
111: public void setOutputFormat(OutputFormat format) {
112: super .setOutputFormat(format != null ? format
113: : new OutputFormat(Method.TEXT, null, false));
114: }
115:
116: //-----------------------------------------//
117: // SAX content handler serializing methods //
118: //-----------------------------------------//
119:
120: public void startElement(String namespaceURI, String localName,
121: String rawName, Attributes attrs) throws SAXException {
122: startElement(rawName == null ? localName : rawName, null);
123: }
124:
125: public void endElement(String namespaceURI, String localName,
126: String rawName) throws SAXException {
127: endElement(rawName == null ? localName : rawName);
128: }
129:
130: //------------------------------------------//
131: // SAX document handler serializing methods //
132: //------------------------------000---------//
133:
134: public void startElement(String tagName, AttributeList attrs)
135: throws SAXException {
136: boolean preserveSpace;
137: ElementState state;
138:
139: try {
140: state = getElementState();
141: if (isDocumentState()) {
142: // If this is the root element handle it differently.
143: // If the first root element in the document, serialize
144: // the document's DOCTYPE. Space preserving defaults
145: // to that of the output format.
146: if (!_started)
147: startDocument(tagName);
148: }
149: // For any other element, if first in parent, then
150: // use the parnet's space preserving.
151: preserveSpace = state.preserveSpace;
152:
153: // Do not change the current element state yet.
154: // This only happens in endElement().
155:
156: // Ignore all other attributes of the element, only printing
157: // its contents.
158:
159: // Now it's time to enter a new element state
160: // with the tag name and space preserving.
161: // We still do not change the curent element state.
162: state = enterElementState(null, null, tagName,
163: preserveSpace);
164: } catch (IOException except) {
165: throw new SAXException(except);
166: }
167: }
168:
169: public void endElement(String tagName) throws SAXException {
170: try {
171: endElementIO(tagName);
172: } catch (IOException except) {
173: throw new SAXException(except);
174: }
175: }
176:
177: public void endElementIO(String tagName) throws IOException {
178: ElementState state;
179:
180: // Works much like content() with additions for closing
181: // an element. Note the different checks for the closed
182: // element's state and the parent element's state.
183: state = getElementState();
184: // Leave the element state and update that of the parent
185: // (if we're not root) to not empty and after element.
186: state = leaveElementState();
187: state.afterElement = true;
188: state.empty = false;
189: if (isDocumentState())
190: _printer.flush();
191: }
192:
193: public void processingInstructionIO(String target, String code)
194: throws IOException {
195: }
196:
197: public void comment(String text) {
198: }
199:
200: public void comment(char[] chars, int start, int length) {
201: }
202:
203: public void characters(char[] chars, int start, int length)
204: throws SAXException {
205: ElementState state;
206:
207: try {
208: state = content();
209: state.doCData = state.inCData = false;
210: printText(chars, start, length, true, true);
211: } catch (IOException except) {
212: throw new SAXException(except);
213: }
214: }
215:
216: protected void characters(String text, boolean unescaped)
217: throws IOException {
218: ElementState state;
219:
220: state = content();
221: state.doCData = state.inCData = false;
222: printText(text, true, true);
223: }
224:
225: //------------------------------------------//
226: // Generic node serializing methods methods //
227: //------------------------------------------//
228:
229: /**
230: * Called to serialize the document's DOCTYPE by the root element.
231: * <p>
232: * This method will check if it has not been called before ({@link #_started}),
233: * will serialize the document type declaration, and will serialize all
234: * pre-root comments and PIs that were accumulated in the document
235: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
236: * this is not the first root element of the document.
237: */
238: protected void startDocument(String rootTagName) throws IOException {
239: // Required to stop processing the DTD, even though the DTD
240: // is not printed.
241: _printer.leaveDTD();
242:
243: _started = true;
244: // Always serialize these, even if not te first root element.
245: serializePreRoot();
246: }
247:
248: /**
249: * Called to serialize a DOM element. Equivalent to calling {@link
250: * #startElement}, {@link #endElement} and serializing everything
251: * inbetween, but better optimized.
252: */
253: protected void serializeElement(Element elem) throws IOException {
254: Node child;
255: ElementState state;
256: boolean preserveSpace;
257: String tagName;
258:
259: tagName = elem.getTagName();
260: state = getElementState();
261: if (isDocumentState()) {
262: // If this is the root element handle it differently.
263: // If the first root element in the document, serialize
264: // the document's DOCTYPE. Space preserving defaults
265: // to that of the output format.
266: if (!_started)
267: startDocument(tagName);
268: }
269: // For any other element, if first in parent, then
270: // use the parnet's space preserving.
271: preserveSpace = state.preserveSpace;
272:
273: // Do not change the current element state yet.
274: // This only happens in endElement().
275:
276: // Ignore all other attributes of the element, only printing
277: // its contents.
278:
279: // If element has children, then serialize them, otherwise
280: // serialize en empty tag.
281: if (elem.hasChildNodes()) {
282: // Enter an element state, and serialize the children
283: // one by one. Finally, end the element.
284: state = enterElementState(null, null, tagName,
285: preserveSpace);
286: child = elem.getFirstChild();
287: while (child != null) {
288: serializeNode(child);
289: child = child.getNextSibling();
290: }
291: endElementIO(tagName);
292: } else {
293: if (!isDocumentState()) {
294: // After element but parent element is no longer empty.
295: state.afterElement = true;
296: state.empty = false;
297: }
298: }
299: }
300:
301: /**
302: * Serialize the DOM node. This method is unique to the Text serializer.
303: *
304: * @param node The node to serialize
305: */
306: protected void serializeNode(Node node) throws IOException {
307: // Based on the node type call the suitable SAX handler.
308: // Only comments entities and documents which are not
309: // handled by SAX are serialized directly.
310: switch (node.getNodeType()) {
311: case Node.TEXT_NODE: {
312: String text;
313:
314: text = node.getNodeValue();
315: if (text != null)
316: characters(node.getNodeValue(), true);
317: break;
318: }
319:
320: case Node.CDATA_SECTION_NODE: {
321: String text;
322:
323: text = node.getNodeValue();
324: if (text != null)
325: characters(node.getNodeValue(), true);
326: break;
327: }
328:
329: case Node.COMMENT_NODE:
330: break;
331:
332: case Node.ENTITY_REFERENCE_NODE:
333: // Ignore.
334: break;
335:
336: case Node.PROCESSING_INSTRUCTION_NODE:
337: break;
338:
339: case Node.ELEMENT_NODE:
340: serializeElement((Element) node);
341: break;
342:
343: case Node.DOCUMENT_NODE:
344: // !!! Fall through
345: case Node.DOCUMENT_FRAGMENT_NODE: {
346: Node child;
347:
348: // By definition this will happen if the node is a document,
349: // document fragment, etc. Just serialize its contents. It will
350: // work well for other nodes that we do not know how to serialize.
351: child = node.getFirstChild();
352: while (child != null) {
353: serializeNode(child);
354: child = child.getNextSibling();
355: }
356: break;
357: }
358:
359: default:
360: break;
361: }
362: }
363:
364: protected ElementState content() {
365: ElementState state;
366:
367: state = getElementState();
368: if (!isDocumentState()) {
369: // If this is the first content in the element,
370: // change the state to not-empty.
371: if (state.empty)
372: state.empty = false;
373: // Except for one content type, all of them
374: // are not last element. That one content
375: // type will take care of itself.
376: state.afterElement = false;
377: }
378: return state;
379: }
380:
381: protected String getEntityRef(int ch) {
382: return null;
383: }
384:
385: }
|