001: /*
002: * Copyright 1999-2002,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: // Sep 14, 2000:
018: // Fixed serializer to report IO exception directly, instead at
019: // the end of document processing.
020: // Reported by Patrick Higgins <phiggins@transzap.com>
021:
022: package org.jasig.portal.serialize;
023:
024: import java.io.IOException;
025:
026: import org.w3c.dom.Element;
027: import org.w3c.dom.Node;
028: import org.xml.sax.AttributeList;
029: import org.xml.sax.Attributes;
030: import org.xml.sax.SAXException;
031:
032: /**
033: * Implements a text serializer supporting both DOM and SAX
034: * serializing. For usage instructions see {@link Serializer}.
035: * <p>
036: * If an output stream is used, the encoding is taken from the
037: * output format (defaults to <tt>UTF-8</tt>). If a writer is
038: * used, make sure the writer uses the same encoding (if applies)
039: * as specified in the output format.
040: * <p>
041: * The serializer supports both DOM and SAX. DOM serializing is done
042: * by calling {@link #serialize} and SAX serializing is done by firing
043: * SAX events and using the serializer as a document handler.
044: * <p>
045: * If an I/O exception occurs while serializing, the serializer
046: * will not throw an exception directly, but only throw it
047: * at the end of serializing (either DOM or SAX's {@link
048: * org.xml.sax.DocumentHandler#endDocument}.
049: *
050: *
051: * @version $Revision: 36559 $ $Date: 2006-04-28 11:38:13 -0700 (Fri, 28 Apr 2006) $
052: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
053: * @see Serializer
054: */
055: public class TextSerializer extends BaseMarkupSerializer {
056:
057: /**
058: * Constructs a new serializer. The serializer cannot be used without
059: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
060: * first.
061: */
062: public TextSerializer() {
063: super (new OutputFormat(Method.TEXT, null, false));
064: }
065:
066: public void setOutputFormat(OutputFormat format) {
067: super .setOutputFormat(format != null ? format
068: : new OutputFormat(Method.TEXT, null, false));
069: }
070:
071: //-----------------------------------------//
072: // SAX content handler serializing methods //
073: //-----------------------------------------//
074:
075: public void startElement(String namespaceURI, String localName,
076: String rawName, Attributes attrs) throws SAXException {
077: startElement(rawName == null ? localName : rawName, null);
078: }
079:
080: public void endElement(String namespaceURI, String localName,
081: String rawName) throws SAXException {
082: endElement(rawName == null ? localName : rawName);
083: }
084:
085: //------------------------------------------//
086: // SAX document handler serializing methods //
087: //------------------------------000---------//
088:
089: public void startElement(String tagName, AttributeList attrs)
090: throws SAXException {
091: boolean preserveSpace;
092: ElementState state;
093:
094: try {
095: state = getElementState();
096: if (isDocumentState()) {
097: // If this is the root element handle it differently.
098: // If the first root element in the document, serialize
099: // the document's DOCTYPE. Space preserving defaults
100: // to that of the output format.
101: if (!_started)
102: startDocument(tagName);
103: }
104: // For any other element, if first in parent, then
105: // use the parnet's space preserving.
106: preserveSpace = state.preserveSpace;
107:
108: // Do not change the current element state yet.
109: // This only happens in endElement().
110:
111: // Ignore all other attributes of the element, only printing
112: // its contents.
113:
114: // Now it's time to enter a new element state
115: // with the tag name and space preserving.
116: // We still do not change the curent element state.
117: state = enterElementState(null, null, tagName,
118: preserveSpace);
119: } catch (IOException except) {
120: throw new SAXException(except);
121: }
122: }
123:
124: public void endElement(String tagName) throws SAXException {
125: try {
126: endElementIO(tagName);
127: } catch (IOException except) {
128: throw new SAXException(except);
129: }
130: }
131:
132: public void endElementIO(String tagName) throws IOException {
133: ElementState state;
134:
135: // Works much like content() with additions for closing
136: // an element. Note the different checks for the closed
137: // element's state and the parent element's state.
138: state = getElementState();
139: // Leave the element state and update that of the parent
140: // (if we're not root) to not empty and after element.
141: state = leaveElementState();
142: state.afterElement = true;
143: state.empty = false;
144: if (isDocumentState())
145: _printer.flush();
146: }
147:
148: public void processingInstructionIO(String target, String code)
149: throws IOException {
150: }
151:
152: public void comment(String text) {
153: }
154:
155: public void comment(char[] chars, int start, int length) {
156: }
157:
158: public void characters(char[] chars, int start, int length)
159: throws SAXException {
160: ElementState state;
161:
162: try {
163: state = content();
164: state.doCData = state.inCData = false;
165: printText(chars, start, length, true, true);
166: } catch (IOException except) {
167: throw new SAXException(except);
168: }
169: }
170:
171: protected void characters(String text, boolean unescaped)
172: throws IOException {
173: ElementState state;
174:
175: state = content();
176: state.doCData = state.inCData = false;
177: printText(text, true, true);
178: }
179:
180: //------------------------------------------//
181: // Generic node serializing methods methods //
182: //------------------------------------------//
183:
184: /**
185: * Called to serialize the document's DOCTYPE by the root element.
186: * <p>
187: * This method will check if it has not been called before ({@link #_started}),
188: * will serialize the document type declaration, and will serialize all
189: * pre-root comments and PIs that were accumulated in the document
190: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
191: * this is not the first root element of the document.
192: */
193: protected void startDocument(String rootTagName) throws IOException {
194: // Required to stop processing the DTD, even though the DTD
195: // is not printed.
196: _printer.leaveDTD();
197:
198: _started = true;
199: // Always serialize these, even if not te first root element.
200: serializePreRoot();
201: }
202:
203: /**
204: * Called to serialize a DOM element. Equivalent to calling {@link
205: * #startElement}, {@link #endElement} and serializing everything
206: * inbetween, but better optimized.
207: */
208: protected void serializeElement(Element elem) throws IOException {
209: Node child;
210: ElementState state;
211: boolean preserveSpace;
212: String tagName;
213:
214: tagName = elem.getTagName();
215: state = getElementState();
216: if (isDocumentState()) {
217: // If this is the root element handle it differently.
218: // If the first root element in the document, serialize
219: // the document's DOCTYPE. Space preserving defaults
220: // to that of the output format.
221: if (!_started)
222: startDocument(tagName);
223: }
224: // For any other element, if first in parent, then
225: // use the parnet's space preserving.
226: preserveSpace = state.preserveSpace;
227:
228: // Do not change the current element state yet.
229: // This only happens in endElement().
230:
231: // Ignore all other attributes of the element, only printing
232: // its contents.
233:
234: // If element has children, then serialize them, otherwise
235: // serialize en empty tag.
236: if (elem.hasChildNodes()) {
237: // Enter an element state, and serialize the children
238: // one by one. Finally, end the element.
239: state = enterElementState(null, null, tagName,
240: preserveSpace);
241: child = elem.getFirstChild();
242: while (child != null) {
243: serializeNode(child);
244: child = child.getNextSibling();
245: }
246: endElementIO(tagName);
247: } else {
248: if (!isDocumentState()) {
249: // After element but parent element is no longer empty.
250: state.afterElement = true;
251: state.empty = false;
252: }
253: }
254: }
255:
256: /**
257: * Serialize the DOM node. This method is unique to the Text serializer.
258: *
259: * @param node The node to serialize
260: */
261: protected void serializeNode(Node node) throws IOException {
262: // Based on the node type call the suitable SAX handler.
263: // Only comments entities and documents which are not
264: // handled by SAX are serialized directly.
265: switch (node.getNodeType()) {
266: case Node.TEXT_NODE: {
267: String text;
268:
269: text = node.getNodeValue();
270: if (text != null)
271: characters(node.getNodeValue(), true);
272: break;
273: }
274:
275: case Node.CDATA_SECTION_NODE: {
276: String text;
277:
278: text = node.getNodeValue();
279: if (text != null)
280: characters(node.getNodeValue(), true);
281: break;
282: }
283:
284: case Node.COMMENT_NODE:
285: break;
286:
287: case Node.ENTITY_REFERENCE_NODE:
288: // Ignore.
289: break;
290:
291: case Node.PROCESSING_INSTRUCTION_NODE:
292: break;
293:
294: case Node.ELEMENT_NODE:
295: serializeElement((Element) node);
296: break;
297:
298: case Node.DOCUMENT_NODE:
299: // !!! Fall through
300: case Node.DOCUMENT_FRAGMENT_NODE: {
301: Node child;
302:
303: // By definition this will happen if the node is a document,
304: // document fragment, etc. Just serialize its contents. It will
305: // work well for other nodes that we do not know how to serialize.
306: child = node.getFirstChild();
307: while (child != null) {
308: serializeNode(child);
309: child = child.getNextSibling();
310: }
311: break;
312: }
313:
314: default:
315: break;
316: }
317: }
318:
319: protected ElementState content() {
320: ElementState state;
321:
322: state = getElementState();
323: if (!isDocumentState()) {
324: // If this is the first content in the element,
325: // change the state to not-empty.
326: if (state.empty)
327: state.empty = false;
328: // Except for one content type, all of them
329: // are not last element. That one content
330: // type will take care of itself.
331: state.afterElement = false;
332: }
333: return state;
334: }
335:
336: protected String getEntityRef(int ch) {
337: return null;
338: }
339:
340: }
|