001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: // Sep 14, 2000:
059: // Fixed problem with namespace handling. Contributed by
060: // David Blondeau <blondeau@intalio.com>
061: // Sep 14, 2000:
062: // Fixed serializer to report IO exception directly, instead at
063: // the end of document processing.
064: // Reported by Patrick Higgins <phiggins@transzap.com>
065: // Aug 21, 2000:
066: // Fixed bug in startDocument not calling prepare.
067: // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
068: // Aug 21, 2000:
069: // Added ability to omit DOCTYPE declaration.
070:
071: package org.apache.xml.serialize;
072:
073: import java.io.IOException;
074: import java.io.UnsupportedEncodingException;
075: import java.io.OutputStream;
076: import java.io.Writer;
077: import java.util.Enumeration;
078:
079: import org.w3c.dom.*;
080: import org.xml.sax.DocumentHandler;
081: import org.xml.sax.ContentHandler;
082: import org.xml.sax.AttributeList;
083: import org.xml.sax.Attributes;
084: import org.xml.sax.SAXException;
085: import org.xml.sax.helpers.AttributesImpl;
086:
087: /**
088: * Implements an XML serializer supporting both DOM and SAX pretty
089: * serializing. For usage instructions see {@link Serializer}.
090: * <p>
091: * If an output stream is used, the encoding is taken from the
092: * output format (defaults to <tt>UTF-8</tt>). If a writer is
093: * used, make sure the writer uses the same encoding (if applies)
094: * as specified in the output format.
095: * <p>
096: * The serializer supports both DOM and SAX. DOM serializing is done
097: * by calling {@link #serialize} and SAX serializing is done by firing
098: * SAX events and using the serializer as a document handler.
099: * <p>
100: * If an I/O exception occurs while serializing, the serializer
101: * will not throw an exception directly, but only throw it
102: * at the end of serializing (either DOM or SAX's {@link
103: * org.xml.sax.DocumentHandler#endDocument}.
104: * <p>
105: * For elements that are not specified as whitespace preserving,
106: * the serializer will potentially break long text lines at space
107: * boundaries, indent lines, and serialize elements on separate
108: * lines. Line terminators will be regarded as spaces, and
109: * spaces at beginning of line will be stripped.
110: *
111: *
112: * @version $Revision: 1.22.2.1 $ $Date: 2001/11/05 15:30:10 $
113: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
114: * @see Serializer
115: */
116: public class XMLSerializer extends BaseMarkupSerializer {
117:
118: /**
119: * Constructs a new serializer. The serializer cannot be used without
120: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
121: * first.
122: */
123: public XMLSerializer() {
124: super (new OutputFormat(Method.XML, null, false));
125: }
126:
127: /**
128: * Constructs a new serializer. The serializer cannot be used without
129: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
130: * first.
131: */
132: public XMLSerializer(OutputFormat format) {
133: super (format != null ? format : new OutputFormat(Method.XML,
134: null, false));
135: _format.setMethod(Method.XML);
136: }
137:
138: /**
139: * Constructs a new serializer that writes to the specified writer
140: * using the specified output format. If <tt>format</tt> is null,
141: * will use a default output format.
142: *
143: * @param writer The writer to use
144: * @param format The output format to use, null for the default
145: */
146: public XMLSerializer(Writer writer, OutputFormat format) {
147: super (format != null ? format : new OutputFormat(Method.XML,
148: null, false));
149: _format.setMethod(Method.XML);
150: setOutputCharStream(writer);
151: }
152:
153: /**
154: * Constructs a new serializer that writes to the specified output
155: * stream using the specified output format. If <tt>format</tt>
156: * is null, will use a default output format.
157: *
158: * @param output The output stream to use
159: * @param format The output format to use, null for the default
160: */
161: public XMLSerializer(OutputStream output, OutputFormat format) {
162: super (format != null ? format : new OutputFormat(Method.XML,
163: null, false));
164: _format.setMethod(Method.XML);
165: setOutputByteStream(output);
166: }
167:
168: public void setOutputFormat(OutputFormat format) {
169: super .setOutputFormat(format != null ? format
170: : new OutputFormat(Method.XML, null, false));
171: }
172:
173: //-----------------------------------------//
174: // SAX content handler serializing methods //
175: //-----------------------------------------//
176:
177: public void startElement( String namespaceURI, String localName,
178: String rawName, Attributes attrs )
179: throws SAXException
180: {
181: int i;
182: boolean preserveSpace;
183: ElementState state;
184: String name;
185: String value;
186: boolean addNSAttr = false;
187:
188: try {
189: if ( _printer == null )
190: throw new IllegalStateException( "SER002 No writer supplied for serializer" );
191:
192: state = getElementState();
193: if ( isDocumentState() ) {
194: // If this is the root element handle it differently.
195: // If the first root element in the document, serialize
196: // the document's DOCTYPE. Space preserving defaults
197: // to that of the output format.
198: if ( ! _started )
199: startDocument( ( localName == null || localName.length() == 0 ) ? rawName : localName );
200: } else {
201: // For any other element, if first in parent, then
202: // close parent's opening tag and use the parnet's
203: // space preserving.
204: if ( state.empty )
205: _printer.printText( '>' );
206: // Must leave CData section first
207: if ( state.inCData )
208: {
209: _printer.printText( "]]>" );
210: state.inCData = false;
211: }
212: // Indent this element on a new line if the first
213: // content of the parent element or immediately
214: // following an element or a comment
215: if ( _indenting && ! state.preserveSpace &&
216: ( state.empty || state.afterElement || state.afterComment) )
217: _printer.breakLine();
218: }
219: preserveSpace = state.preserveSpace;
220:
221: //We remove the namespaces from the attributes list so that they will
222: //be in _prefixes
223: attrs = extractNamespaces(attrs);
224:
225: // Do not change the current element state yet.
226: // This only happens in endElement().
227: if ( rawName == null || rawName.length() == 0 ) {
228: if ( localName == null )
229: throw new SAXException( "No rawName and localName is null" );
230: if ( namespaceURI != null && ! namespaceURI.equals( "" ) ) {
231: String prefix;
232: prefix = getPrefix( namespaceURI );
233: if ( prefix != null && prefix.length() > 0 )
234: rawName = prefix + ":" + localName;
235: else
236: rawName = localName;
237: } else
238: rawName = localName;
239: addNSAttr = true;
240: }
241:
242: _printer.printText( '<' );
243: _printer.printText( rawName );
244: _printer.indent();
245:
246: // For each attribute print it's name and value as one part,
247: // separated with a space so the element can be broken on
248: // multiple lines.
249: if ( attrs != null ) {
250: for ( i = 0 ; i < attrs.getLength() ; ++i ) {
251: _printer.printSpace();
252:
253: name = attrs.getQName( i );
254: if ( name != null && name.length() == 0 ) {
255: String prefix;
256: String attrURI;
257:
258: name = attrs.getLocalName( i );
259: attrURI = attrs.getURI( i );
260: if ( ( attrURI != null && attrURI.length() != 0 ) &&
261: ( namespaceURI == null || namespaceURI.length() == 0 ||
262: ! attrURI.equals( namespaceURI ) ) ) {
263: prefix = getPrefix( attrURI );
264: if ( prefix != null && prefix.length() > 0 )
265: name = prefix + ":" + name;
266: }
267: }
268:
269: value = attrs.getValue( i );
270: if ( value == null )
271: value = "";
272: _printer.printText( name );
273: _printer.printText( "=\"" );
274: printEscaped( value );
275: _printer.printText( '"' );
276:
277: // If the attribute xml:space exists, determine whether
278: // to preserve spaces in this and child nodes based on
279: // its value.
280: if ( name.equals( "xml:space" ) ) {
281: if ( value.equals( "preserve" ) )
282: preserveSpace = true;
283: else
284: preserveSpace = _format.getPreserveSpace();
285: }
286: }
287: }
288:
289: if ( _prefixes != null ) {
290: Enumeration enum;
291:
292: enum = _prefixes.keys();
293: while ( enum.hasMoreElements() ) {
294: _printer.printSpace();
295: value = (String) enum.nextElement();
296: name = (String) _prefixes.get( value );
297: if ( name.length() == 0 ) {
298: _printer.printText( "xmlns=\"" );
299: printEscaped( value );
300: _printer.printText( '"' );
301: } else {
302: _printer.printText( "xmlns:" );
303: _printer.printText( name );
304: _printer.printText( "=\"" );
305: printEscaped( value );
306: _printer.printText( '"' );
307: }
308: }
309: }
310:
311: // Now it's time to enter a new element state
312: // with the tag name and space preserving.
313: // We still do not change the curent element state.
314: state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
315: name = ( localName == null || localName.length() == 0 ) ? rawName : namespaceURI + "^" + localName;
316: state.doCData = _format.isCDataElement( name );
317: state.unescaped = _format.isNonEscapingElement( name );
318: } catch ( IOException except ) {
319: throw new SAXException( except );
320: }
321: }
322:
323: public void endElement(String namespaceURI, String localName,
324: String rawName) throws SAXException {
325: try {
326: endElementIO(namespaceURI, localName, rawName);
327: } catch (IOException except) {
328: throw new SAXException(except);
329: }
330: }
331:
332: public void endElementIO(String namespaceURI, String localName,
333: String rawName) throws IOException {
334: ElementState state;
335:
336: // Works much like content() with additions for closing
337: // an element. Note the different checks for the closed
338: // element's state and the parent element's state.
339: _printer.unindent();
340: state = getElementState();
341: if (state.empty) {
342: _printer.printText("/>");
343: } else {
344: // Must leave CData section first
345: if (state.inCData)
346: _printer.printText("]]>");
347: // This element is not empty and that last content was
348: // another element, so print a line break before that
349: // last element and this element's closing tag.
350: if (_indenting && !state.preserveSpace
351: && (state.afterElement || state.afterComment))
352: _printer.breakLine();
353: _printer.printText("</");
354: _printer.printText(state.rawName);
355: _printer.printText('>');
356: }
357: // Leave the element state and update that of the parent
358: // (if we're not root) to not empty and after element.
359: state = leaveElementState();
360: state.afterElement = true;
361: state.afterComment = false;
362: state.empty = false;
363: if (isDocumentState())
364: _printer.flush();
365: }
366:
367: //------------------------------------------//
368: // SAX document handler serializing methods //
369: //------------------------------------------//
370:
371: public void startElement(String tagName, AttributeList attrs)
372: throws SAXException {
373: int i;
374: boolean preserveSpace;
375: ElementState state;
376: String name;
377: String value;
378:
379: try {
380: if (_printer == null)
381: throw new IllegalStateException(
382: "SER002 No writer supplied for serializer");
383:
384: state = getElementState();
385: if (isDocumentState()) {
386: // If this is the root element handle it differently.
387: // If the first root element in the document, serialize
388: // the document's DOCTYPE. Space preserving defaults
389: // to that of the output format.
390: if (!_started)
391: startDocument(tagName);
392: } else {
393: // For any other element, if first in parent, then
394: // close parent's opening tag and use the parnet's
395: // space preserving.
396: if (state.empty)
397: _printer.printText('>');
398: // Must leave CData section first
399: if (state.inCData) {
400: _printer.printText("]]>");
401: state.inCData = false;
402: }
403: // Indent this element on a new line if the first
404: // content of the parent element or immediately
405: // following an element.
406: if (_indenting
407: && !state.preserveSpace
408: && (state.empty || state.afterElement || state.afterComment))
409: _printer.breakLine();
410: }
411: preserveSpace = state.preserveSpace;
412:
413: // Do not change the current element state yet.
414: // This only happens in endElement().
415:
416: _printer.printText('<');
417: _printer.printText(tagName);
418: _printer.indent();
419:
420: // For each attribute print it's name and value as one part,
421: // separated with a space so the element can be broken on
422: // multiple lines.
423: if (attrs != null) {
424: for (i = 0; i < attrs.getLength(); ++i) {
425: _printer.printSpace();
426: name = attrs.getName(i);
427: value = attrs.getValue(i);
428: if (value != null) {
429: _printer.printText(name);
430: _printer.printText("=\"");
431: printEscaped(value);
432: _printer.printText('"');
433: }
434:
435: // If the attribute xml:space exists, determine whether
436: // to preserve spaces in this and child nodes based on
437: // its value.
438: if (name.equals("xml:space")) {
439: if (value.equals("preserve"))
440: preserveSpace = true;
441: else
442: preserveSpace = _format.getPreserveSpace();
443: }
444: }
445: }
446: // Now it's time to enter a new element state
447: // with the tag name and space preserving.
448: // We still do not change the curent element state.
449: state = enterElementState(null, null, tagName,
450: preserveSpace);
451: state.doCData = _format.isCDataElement(tagName);
452: state.unescaped = _format.isNonEscapingElement(tagName);
453: } catch (IOException except) {
454: throw new SAXException(except);
455: }
456:
457: }
458:
459: public void endElement(String tagName) throws SAXException {
460: endElement(null, null, tagName);
461: }
462:
463: //------------------------------------------//
464: // Generic node serializing methods methods //
465: //------------------------------------------//
466:
467: /**
468: * Called to serialize the document's DOCTYPE by the root element.
469: * The document type declaration must name the root element,
470: * but the root element is only known when that element is serialized,
471: * and not at the start of the document.
472: * <p>
473: * This method will check if it has not been called before ({@link #_started}),
474: * will serialize the document type declaration, and will serialize all
475: * pre-root comments and PIs that were accumulated in the document
476: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
477: * this is not the first root element of the document.
478: */
479: protected void startDocument(String rootTagName) throws IOException {
480: int i;
481: String dtd;
482:
483: dtd = _printer.leaveDTD();
484: if (!_started) {
485:
486: if (!_format.getOmitXMLDeclaration()) {
487: StringBuffer buffer;
488:
489: // Serialize the document declaration appreaing at the head
490: // of very XML document (unless asked not to).
491: buffer = new StringBuffer("<?xml version=\"");
492: if (_format.getVersion() != null)
493: buffer.append(_format.getVersion());
494: else
495: buffer.append("1.0");
496: buffer.append('"');
497: if (_format.getEncoding() != null) {
498: buffer.append(" encoding=\"");
499: buffer.append(_format.getEncoding());
500: buffer.append('"');
501: }
502: if (_format.getStandalone() && _docTypeSystemId == null
503: && _docTypePublicId == null)
504: buffer.append(" standalone=\"yes\"");
505: buffer.append("?>");
506: _printer.printText(buffer);
507: _printer.breakLine();
508: }
509:
510: if (!_format.getOmitDocumentType()) {
511: if (_docTypeSystemId != null) {
512: // System identifier must be specified to print DOCTYPE.
513: // If public identifier is specified print 'PUBLIC
514: // <public> <system>', if not, print 'SYSTEM <system>'.
515: _printer.printText("<!DOCTYPE ");
516: _printer.printText(rootTagName);
517: if (_docTypePublicId != null) {
518: _printer.printText(" PUBLIC ");
519: printDoctypeURL(_docTypePublicId);
520: if (_indenting) {
521: _printer.breakLine();
522: for (i = 0; i < 18 + rootTagName.length(); ++i)
523: _printer.printText(" ");
524: } else
525: _printer.printText(" ");
526: printDoctypeURL(_docTypeSystemId);
527: } else {
528: _printer.printText(" SYSTEM ");
529: printDoctypeURL(_docTypeSystemId);
530: }
531:
532: // If we accumulated any DTD contents while printing.
533: // this would be the place to print it.
534: if (dtd != null && dtd.length() > 0) {
535: _printer.printText(" [");
536: printText(dtd, true, true);
537: _printer.printText(']');
538: }
539:
540: _printer.printText(">");
541: _printer.breakLine();
542: } else if (dtd != null && dtd.length() > 0) {
543: _printer.printText("<!DOCTYPE ");
544: _printer.printText(rootTagName);
545: _printer.printText(" [");
546: printText(dtd, true, true);
547: _printer.printText("]>");
548: _printer.breakLine();
549: }
550: }
551: }
552: _started = true;
553: // Always serialize these, even if not te first root element.
554: serializePreRoot();
555: }
556:
557: /**
558: * Called to serialize a DOM element. Equivalent to calling {@link
559: * #startElement}, {@link #endElement} and serializing everything
560: * inbetween, but better optimized.
561: */
562: protected void serializeElement(Element elem) throws IOException {
563: Attr attr;
564: NamedNodeMap attrMap;
565: int i;
566: Node child;
567: ElementState state;
568: boolean preserveSpace;
569: String name;
570: String value;
571: String tagName;
572:
573: tagName = elem.getTagName();
574: state = getElementState();
575: if (isDocumentState()) {
576: // If this is the root element handle it differently.
577: // If the first root element in the document, serialize
578: // the document's DOCTYPE. Space preserving defaults
579: // to that of the output format.
580: if (!_started)
581: startDocument(tagName);
582: } else {
583: // For any other element, if first in parent, then
584: // close parent's opening tag and use the parnet's
585: // space preserving.
586: if (state.empty)
587: _printer.printText('>');
588: // Must leave CData section first
589: if (state.inCData) {
590: _printer.printText("]]>");
591: state.inCData = false;
592: }
593: // Indent this element on a new line if the first
594: // content of the parent element or immediately
595: // following an element.
596: if (_indenting
597: && !state.preserveSpace
598: && (state.empty || state.afterElement || state.afterComment))
599: _printer.breakLine();
600: }
601: preserveSpace = state.preserveSpace;
602:
603: // Do not change the current element state yet.
604: // This only happens in endElement().
605:
606: _printer.printText('<');
607: _printer.printText(tagName);
608: _printer.indent();
609:
610: // Lookup the element's attribute, but only print specified
611: // attributes. (Unspecified attributes are derived from the DTD.
612: // For each attribute print it's name and value as one part,
613: // separated with a space so the element can be broken on
614: // multiple lines.
615: attrMap = elem.getAttributes();
616: if (attrMap != null) {
617: for (i = 0; i < attrMap.getLength(); ++i) {
618: attr = (Attr) attrMap.item(i);
619: name = attr.getName();
620: value = attr.getValue();
621: if (value == null)
622: value = "";
623: if (attr.getSpecified()) {
624: _printer.printSpace();
625: _printer.printText(name);
626: _printer.printText("=\"");
627: printEscaped(value);
628: _printer.printText('"');
629: }
630: // If the attribute xml:space exists, determine whether
631: // to preserve spaces in this and child nodes based on
632: // its value.
633: if (name.equals("xml:space")) {
634: if (value.equals("preserve"))
635: preserveSpace = true;
636: else
637: preserveSpace = _format.getPreserveSpace();
638: }
639: }
640: }
641:
642: // If element has children, then serialize them, otherwise
643: // serialize en empty tag.
644: if (elem.hasChildNodes()) {
645: // Enter an element state, and serialize the children
646: // one by one. Finally, end the element.
647: state = enterElementState(null, null, tagName,
648: preserveSpace);
649: state.doCData = _format.isCDataElement(tagName);
650: state.unescaped = _format.isNonEscapingElement(tagName);
651: child = elem.getFirstChild();
652: while (child != null) {
653: serializeNode(child);
654: child = child.getNextSibling();
655: }
656: endElementIO(null, null, tagName);
657: } else {
658: _printer.unindent();
659: _printer.printText("/>");
660: // After element but parent element is no longer empty.
661: state.afterElement = true;
662: state.afterComment = false;
663: state.empty = false;
664: if (isDocumentState())
665: _printer.flush();
666: }
667: }
668:
669: protected String getEntityRef(int ch) {
670: // Encode special XML characters into the equivalent character references.
671: // These five are defined by default for all XML documents.
672: switch (ch) {
673: case '<':
674: return "lt";
675: case '>':
676: return "gt";
677: case '"':
678: return "quot";
679: case '\'':
680: return "apos";
681: case '&':
682: return "amp";
683: }
684: return null;
685: }
686:
687: /** Retrieve and remove the namespaces declarations from the list of attributes.
688: *
689: */
690: private Attributes extractNamespaces(Attributes attrs)
691: throws SAXException {
692: AttributesImpl attrsOnly;
693: String rawName;
694: int i;
695: int indexColon;
696: String prefix;
697: int length;
698:
699: length = attrs.getLength();
700: attrsOnly = new AttributesImpl(attrs);
701:
702: for (i = length - 1; i >= 0; --i) {
703: rawName = attrsOnly.getQName(i);
704:
705: //We have to exclude the namespaces declarations from the attributes
706: //Append only when the feature http://xml.org/sax/features/namespace-prefixes"
707: //is TRUE
708: if (rawName.startsWith("xmlns")) {
709: if (rawName.length() == 5) {
710: startPrefixMapping("", attrs.getValue(i));
711: attrsOnly.removeAttribute(i);
712: } else if (rawName.charAt(5) == ':') {
713: startPrefixMapping(rawName.substring(6), attrs
714: .getValue(i));
715: attrsOnly.removeAttribute(i);
716: }
717: }
718: }
719: return attrsOnly;
720: }
721: }
|