001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: // Sep 14, 2000:
019: // Fixed serializer to report IO exception directly, instead at
020: // the end of document processing.
021: // Reported by Patrick Higgins <phiggins@transzap.com>
022:
023: package org.apache.xml.serialize;
024:
025: import java.io.IOException;
026:
027: import org.w3c.dom.Element;
028: import org.w3c.dom.Node;
029: import org.xml.sax.AttributeList;
030: import org.xml.sax.Attributes;
031: import org.xml.sax.SAXException;
032:
033: /**
034: * Implements a text serializer supporting both DOM and SAX
035: * serializing. For usage instructions see {@link Serializer}.
036: * <p>
037: * If an output stream is used, the encoding is taken from the
038: * output format (defaults to <tt>UTF-8</tt>). If a writer is
039: * used, make sure the writer uses the same encoding (if applies)
040: * as specified in the output format.
041: * <p>
042: * The serializer supports both DOM and SAX. DOM serializing is done
043: * by calling {@link #serialize} and SAX serializing is done by firing
044: * SAX events and using the serializer as a document handler.
045: * <p>
046: * If an I/O exception occurs while serializing, the serializer
047: * will not throw an exception directly, but only throw it
048: * at the end of serializing (either DOM or SAX's {@link
049: * org.xml.sax.DocumentHandler#endDocument}.
050: *
051: * @deprecated This class was deprecated in Xerces 2.9.0. It is recommended
052: * that new applications use the DOM Level 3 LSSerializer or JAXP's Transformation
053: * API for XML (TrAX) for serializing XML and HTML. See the Xerces documentation for more
054: * information.
055: * @version $Revision: 476047 $ $Date: 2006-11-16 23:27:45 -0500 (Thu, 16 Nov 2006) $
056: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
057: * @see Serializer
058: */
059: public class TextSerializer extends BaseMarkupSerializer {
060:
061: /**
062: * Constructs a new serializer. The serializer cannot be used without
063: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
064: * first.
065: */
066: public TextSerializer() {
067: super (new OutputFormat(Method.TEXT, null, false));
068: }
069:
070: public void setOutputFormat(OutputFormat format) {
071: super .setOutputFormat(format != null ? format
072: : new OutputFormat(Method.TEXT, null, false));
073: }
074:
075: //-----------------------------------------//
076: // SAX content handler serializing methods //
077: //-----------------------------------------//
078:
079: public void startElement(String namespaceURI, String localName,
080: String rawName, Attributes attrs) throws SAXException {
081: startElement(rawName == null ? localName : rawName, null);
082: }
083:
084: public void endElement(String namespaceURI, String localName,
085: String rawName) throws SAXException {
086: endElement(rawName == null ? localName : rawName);
087: }
088:
089: //------------------------------------------//
090: // SAX document handler serializing methods //
091: //------------------------------000---------//
092:
093: public void startElement(String tagName, AttributeList attrs)
094: throws SAXException {
095: boolean preserveSpace;
096: ElementState state;
097:
098: try {
099: state = getElementState();
100: if (isDocumentState()) {
101: // If this is the root element handle it differently.
102: // If the first root element in the document, serialize
103: // the document's DOCTYPE. Space preserving defaults
104: // to that of the output format.
105: if (!_started)
106: startDocument(tagName);
107: }
108: // For any other element, if first in parent, then
109: // use the parnet's space preserving.
110: preserveSpace = state.preserveSpace;
111:
112: // Do not change the current element state yet.
113: // This only happens in endElement().
114:
115: // Ignore all other attributes of the element, only printing
116: // its contents.
117:
118: // Now it's time to enter a new element state
119: // with the tag name and space preserving.
120: // We still do not change the curent element state.
121: state = enterElementState(null, null, tagName,
122: preserveSpace);
123: } catch (IOException except) {
124: throw new SAXException(except);
125: }
126: }
127:
128: public void endElement(String tagName) throws SAXException {
129: try {
130: endElementIO(tagName);
131: } catch (IOException except) {
132: throw new SAXException(except);
133: }
134: }
135:
136: public void endElementIO(String tagName) throws IOException {
137: ElementState state;
138:
139: // Works much like content() with additions for closing
140: // an element. Note the different checks for the closed
141: // element's state and the parent element's state.
142: state = getElementState();
143: // Leave the element state and update that of the parent
144: // (if we're not root) to not empty and after element.
145: state = leaveElementState();
146: state.afterElement = true;
147: state.empty = false;
148: if (isDocumentState())
149: _printer.flush();
150: }
151:
152: public void processingInstructionIO(String target, String code)
153: throws IOException {
154: }
155:
156: public void comment(String text) {
157: }
158:
159: public void comment(char[] chars, int start, int length) {
160: }
161:
162: public void characters(char[] chars, int start, int length)
163: throws SAXException {
164: ElementState state;
165:
166: try {
167: state = content();
168: state.doCData = state.inCData = false;
169: printText(chars, start, length, true, true);
170: } catch (IOException except) {
171: throw new SAXException(except);
172: }
173: }
174:
175: protected void characters(String text, boolean unescaped)
176: throws IOException {
177: ElementState state;
178:
179: state = content();
180: state.doCData = state.inCData = false;
181: printText(text, true, true);
182: }
183:
184: //------------------------------------------//
185: // Generic node serializing methods methods //
186: //------------------------------------------//
187:
188: /**
189: * Called to serialize the document's DOCTYPE by the root element.
190: * <p>
191: * This method will check if it has not been called before ({@link #_started}),
192: * will serialize the document type declaration, and will serialize all
193: * pre-root comments and PIs that were accumulated in the document
194: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
195: * this is not the first root element of the document.
196: */
197: protected void startDocument(String rootTagName) throws IOException {
198: // Required to stop processing the DTD, even though the DTD
199: // is not printed.
200: _printer.leaveDTD();
201:
202: _started = true;
203: // Always serialize these, even if not te first root element.
204: serializePreRoot();
205: }
206:
207: /**
208: * Called to serialize a DOM element. Equivalent to calling {@link
209: * #startElement}, {@link #endElement} and serializing everything
210: * inbetween, but better optimized.
211: */
212: protected void serializeElement(Element elem) throws IOException {
213: Node child;
214: ElementState state;
215: boolean preserveSpace;
216: String tagName;
217:
218: tagName = elem.getTagName();
219: state = getElementState();
220: if (isDocumentState()) {
221: // If this is the root element handle it differently.
222: // If the first root element in the document, serialize
223: // the document's DOCTYPE. Space preserving defaults
224: // to that of the output format.
225: if (!_started)
226: startDocument(tagName);
227: }
228: // For any other element, if first in parent, then
229: // use the parnet's space preserving.
230: preserveSpace = state.preserveSpace;
231:
232: // Do not change the current element state yet.
233: // This only happens in endElement().
234:
235: // Ignore all other attributes of the element, only printing
236: // its contents.
237:
238: // If element has children, then serialize them, otherwise
239: // serialize en empty tag.
240: if (elem.hasChildNodes()) {
241: // Enter an element state, and serialize the children
242: // one by one. Finally, end the element.
243: state = enterElementState(null, null, tagName,
244: preserveSpace);
245: child = elem.getFirstChild();
246: while (child != null) {
247: serializeNode(child);
248: child = child.getNextSibling();
249: }
250: endElementIO(tagName);
251: } else {
252: if (!isDocumentState()) {
253: // After element but parent element is no longer empty.
254: state.afterElement = true;
255: state.empty = false;
256: }
257: }
258: }
259:
260: /**
261: * Serialize the DOM node. This method is unique to the Text serializer.
262: *
263: * @param node The node to serialize
264: */
265: protected void serializeNode(Node node) throws IOException {
266: // Based on the node type call the suitable SAX handler.
267: // Only comments entities and documents which are not
268: // handled by SAX are serialized directly.
269: switch (node.getNodeType()) {
270: case Node.TEXT_NODE: {
271: String text;
272:
273: text = node.getNodeValue();
274: if (text != null)
275: characters(node.getNodeValue(), true);
276: break;
277: }
278:
279: case Node.CDATA_SECTION_NODE: {
280: String text;
281:
282: text = node.getNodeValue();
283: if (text != null)
284: characters(node.getNodeValue(), true);
285: break;
286: }
287:
288: case Node.COMMENT_NODE:
289: break;
290:
291: case Node.ENTITY_REFERENCE_NODE:
292: // Ignore.
293: break;
294:
295: case Node.PROCESSING_INSTRUCTION_NODE:
296: break;
297:
298: case Node.ELEMENT_NODE:
299: serializeElement((Element) node);
300: break;
301:
302: case Node.DOCUMENT_NODE:
303: // !!! Fall through
304: case Node.DOCUMENT_FRAGMENT_NODE: {
305: Node child;
306:
307: // By definition this will happen if the node is a document,
308: // document fragment, etc. Just serialize its contents. It will
309: // work well for other nodes that we do not know how to serialize.
310: child = node.getFirstChild();
311: while (child != null) {
312: serializeNode(child);
313: child = child.getNextSibling();
314: }
315: break;
316: }
317:
318: default:
319: break;
320: }
321: }
322:
323: protected ElementState content() {
324: ElementState state;
325:
326: state = getElementState();
327: if (!isDocumentState()) {
328: // If this is the first content in the element,
329: // change the state to not-empty.
330: if (state.empty)
331: state.empty = false;
332: // Except for one content type, all of them
333: // are not last element. That one content
334: // type will take care of itself.
335: state.afterElement = false;
336: }
337: return state;
338: }
339:
340: protected String getEntityRef(int ch) {
341: return null;
342: }
343:
344: }
|