001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: // Sep 14, 2000:
059: // Fixed serializer to report IO exception directly, instead at
060: // the end of document processing.
061: // Reported by Patrick Higgins <phiggins@transzap.com>
062: // Aug 21, 2000:
063: // Fixed bug in startDocument not calling prepare.
064: // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
065: // Aug 21, 2000:
066: // Added ability to omit DOCTYPE declaration.
067: // Sep 1, 2000:
068: // If no output format is provided the serializer now defaults
069: // to ISO-8859-1 encoding. Reported by Mikael Staldal
070: // <d96-mst@d.kth.se>
071:
072: package org.apache.xml.serialize;
073:
074: import java.io.IOException;
075: import java.io.UnsupportedEncodingException;
076: import java.io.OutputStream;
077: import java.io.Writer;
078: import java.util.Enumeration;
079:
080: import org.w3c.dom.Element;
081: import org.w3c.dom.Attr;
082: import org.w3c.dom.Node;
083: import org.w3c.dom.NamedNodeMap;
084: import org.xml.sax.DocumentHandler;
085: import org.xml.sax.ContentHandler;
086: import org.xml.sax.AttributeList;
087: import org.xml.sax.Attributes;
088: import org.xml.sax.SAXException;
089:
090: /**
091: * Implements an HTML/XHTML serializer supporting both DOM and SAX
092: * pretty serializing. HTML/XHTML mode is determined in the
093: * constructor. For usage instructions see {@link Serializer}.
094: * <p>
095: * If an output stream is used, the encoding is taken from the
096: * output format (defaults to <tt>UTF-8</tt>). If a writer is
097: * used, make sure the writer uses the same encoding (if applies)
098: * as specified in the output format.
099: * <p>
100: * The serializer supports both DOM and SAX. DOM serializing is done
101: * by calling {@link #serialize} and SAX serializing is done by firing
102: * SAX events and using the serializer as a document handler.
103: * <p>
104: * If an I/O exception occurs while serializing, the serializer
105: * will not throw an exception directly, but only throw it
106: * at the end of serializing (either DOM or SAX's {@link
107: * org.xml.sax.DocumentHandler#endDocument}.
108: * <p>
109: * For elements that are not specified as whitespace preserving,
110: * the serializer will potentially break long text lines at space
111: * boundaries, indent lines, and serialize elements on separate
112: * lines. Line terminators will be regarded as spaces, and
113: * spaces at beginning of line will be stripped.
114: * <p>
115: * XHTML is slightly different than HTML:
116: * <ul>
117: * <li>Element/attribute names are lower case and case matters
118: * <li>Attributes must specify value, even if empty string
119: * <li>Empty elements must have '/' in empty tag
120: * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
121: * </ul>
122: *
123: *
124: * @version $Revision: 1.15.2.1 $ $Date: 2001/11/07 19:00:34 $
125: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
126: * @see Serializer
127: */
128: public class HTMLSerializer extends BaseMarkupSerializer {
129:
130: /**
131: * True if serializing in XHTML format.
132: */
133: private static boolean _xhtml;
134:
135: public static String XHTMLNamespace = "";
136:
137: /**
138: * Constructs a new HTML/XHTML serializer depending on the value of
139: * <tt>xhtml</tt>. The serializer cannot be used without calling
140: * {@link #init} first.
141: *
142: * @param xhtml True if XHTML serializing
143: */
144: protected HTMLSerializer(boolean xhtml, OutputFormat format) {
145: super (format);
146: _xhtml = xhtml;
147: }
148:
149: /**
150: * Constructs a new serializer. The serializer cannot be used without
151: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
152: * first.
153: */
154: public HTMLSerializer() {
155: this (false, new OutputFormat(Method.HTML, "ISO-8859-1", false));
156: }
157:
158: /**
159: * Constructs a new serializer. The serializer cannot be used without
160: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
161: * first.
162: */
163: public HTMLSerializer(OutputFormat format) {
164: this (false, format != null ? format : new OutputFormat(
165: Method.HTML, "ISO-8859-1", false));
166: }
167:
168: /**
169: * Constructs a new serializer that writes to the specified writer
170: * using the specified output format. If <tt>format</tt> is null,
171: * will use a default output format.
172: *
173: * @param writer The writer to use
174: * @param format The output format to use, null for the default
175: */
176: public HTMLSerializer(Writer writer, OutputFormat format) {
177: this (false, format != null ? format : new OutputFormat(
178: Method.HTML, "ISO-8859-1", false));
179: setOutputCharStream(writer);
180: }
181:
182: /**
183: * Constructs a new serializer that writes to the specified output
184: * stream using the specified output format. If <tt>format</tt>
185: * is null, will use a default output format.
186: *
187: * @param output The output stream to use
188: * @param format The output format to use, null for the default
189: */
190: public HTMLSerializer(OutputStream output, OutputFormat format) {
191: this (false, format != null ? format : new OutputFormat(
192: Method.HTML, "ISO-8859-1", false));
193: setOutputByteStream(output);
194: }
195:
196: public void setOutputFormat(OutputFormat format) {
197: super .setOutputFormat(format != null ? format
198: : new OutputFormat(Method.HTML, "ISO-8859-1", false));
199: }
200:
201: //-----------------------------------------//
202: // SAX content handler serializing methods //
203: //-----------------------------------------//
204:
205: public void startElement( String namespaceURI, String localName,
206: String rawName, Attributes attrs )
207: throws SAXException
208: {
209: int i;
210: boolean preserveSpace;
211: ElementState state;
212: String name;
213: String value;
214: String htmlName;
215: boolean addNSAttr = false;
216:
217: try {
218: if ( _printer == null )
219: throw new IllegalStateException( "SER002 No writer supplied for serializer" );
220:
221: state = getElementState();
222: if ( isDocumentState() ) {
223: // If this is the root element handle it differently.
224: // If the first root element in the document, serialize
225: // the document's DOCTYPE. Space preserving defaults
226: // to that of the output format.
227: if ( ! _started )
228: startDocument( localName == null ? rawName : localName );
229: } else {
230: // For any other element, if first in parent, then
231: // close parent's opening tag and use the parnet's
232: // space preserving.
233: if ( state.empty )
234: _printer.printText( '>' );
235: // Indent this element on a new line if the first
236: // content of the parent element or immediately
237: // following an element.
238: if ( _indenting && ! state.preserveSpace &&
239: ( state.empty || state.afterElement ) )
240: _printer.breakLine();
241: }
242: preserveSpace = state.preserveSpace;
243:
244: // Do not change the current element state yet.
245: // This only happens in endElement().
246:
247: if ( rawName == null ) {
248: rawName = localName;
249: if ( namespaceURI != null ) {
250: String prefix;
251: prefix = getPrefix( namespaceURI );
252: if ( prefix.length() > 0 )
253: rawName = prefix + ":" + localName;
254: }
255: addNSAttr = true;
256: }
257: if ( namespaceURI == null )
258: htmlName = rawName;
259: else {
260: if ( namespaceURI.equals( XHTMLNamespace ) )
261: htmlName = localName;
262: else
263: htmlName = null;
264: }
265:
266: // XHTML: element names are lower case, DOM will be different
267: _printer.printText( '<' );
268: if ( _xhtml )
269: _printer.printText( rawName.toLowerCase() );
270: else
271: _printer.printText( rawName );
272: _printer.indent();
273:
274: // For each attribute serialize it's name and value as one part,
275: // separated with a space so the element can be broken on
276: // multiple lines.
277: if ( attrs != null ) {
278: for ( i = 0 ; i < attrs.getLength() ; ++i ) {
279: _printer.printSpace();
280: name = attrs.getQName( i ).toLowerCase();;
281: value = attrs.getValue( i );
282: if ( _xhtml || namespaceURI != null ) {
283: // XHTML: print empty string for null values.
284: if ( value == null ) {
285: _printer.printText( name );
286: _printer.printText( "=\"\"" );
287: } else {
288: _printer.printText( name );
289: _printer.printText( "=\"" );
290: printEscaped( value );
291: _printer.printText( '"' );
292: }
293: } else {
294: // HTML: Empty values print as attribute name, no value.
295: // HTML: URI attributes will print unescaped
296: if ( value == null ) {
297: value = "";
298: }
299: if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
300: _printer.printText( name );
301: else if ( HTMLdtd.isURI( rawName, name ) ) {
302: _printer.printText( name );
303: _printer.printText( "=\"" );
304: _printer.printText( escapeURI( value ) );
305: _printer.printText( '"' );
306: } else if ( HTMLdtd.isBoolean( rawName, name ) )
307: _printer.printText( name );
308: else {
309: _printer.printText( name );
310: _printer.printText( "=\"" );
311: printEscaped( value );
312: _printer.printText( '"' );
313: }
314: }
315: }
316: }
317: if ( htmlName != null && HTMLdtd.isPreserveSpace( htmlName ) )
318: preserveSpace = true;
319:
320: if ( addNSAttr ) {
321: Enumeration enum;
322:
323: enum = _prefixes.keys();
324: while ( enum.hasMoreElements() ) {
325: _printer.printSpace();
326: value = (String) enum.nextElement();
327: name = (String) _prefixes.get( value );
328: if ( name.length() == 0 ) {
329: _printer.printText( "xmlns=\"" );
330: printEscaped( value );
331: _printer.printText( '"' );
332: } else {
333: _printer.printText( "xmlns:" );
334: _printer.printText( name );
335: _printer.printText( "=\"" );
336: printEscaped( value );
337: _printer.printText( '"' );
338: }
339: }
340: }
341:
342: // Now it's time to enter a new element state
343: // with the tag name and space preserving.
344: // We still do not change the curent element state.
345: state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
346:
347: // Prevents line breaks inside A/TD
348:
349: if ( htmlName != null && ( htmlName.equalsIgnoreCase( "A" ) ||
350: htmlName.equalsIgnoreCase( "TD" ) ) ) {
351: state.empty = false;
352: _printer.printText( '>' );
353: }
354:
355: // Handle SCRIPT and STYLE specifically by changing the
356: // state of the current element to CDATA (XHTML) or
357: // unescaped (HTML).
358: if ( htmlName != null && ( rawName.equalsIgnoreCase( "SCRIPT" ) ||
359: rawName.equalsIgnoreCase( "STYLE" ) ) ) {
360: if ( _xhtml ) {
361: // XHTML: Print contents as CDATA section
362: state.doCData = true;
363: } else {
364: // HTML: Print contents unescaped
365: state.unescaped = true;
366: }
367: }
368: } catch ( IOException except ) {
369: throw new SAXException( except );
370: }
371: }
372:
373: public void endElement(String namespaceURI, String localName,
374: String rawName) throws SAXException {
375: try {
376: endElementIO(namespaceURI, localName, rawName);
377: } catch (IOException except) {
378: throw new SAXException(except);
379: }
380: }
381:
382: public void endElementIO(String namespaceURI, String localName,
383: String rawName) throws IOException {
384: ElementState state;
385: String htmlName;
386:
387: // Works much like content() with additions for closing
388: // an element. Note the different checks for the closed
389: // element's state and the parent element's state.
390: _printer.unindent();
391: state = getElementState();
392:
393: if (state.namespaceURI == null)
394: htmlName = state.rawName;
395: else {
396: if (state.namespaceURI.equals(XHTMLNamespace))
397: htmlName = state.localName;
398: else
399: htmlName = null;
400: }
401:
402: if (_xhtml) {
403: if (state.empty) {
404: _printer.printText(" />");
405: } else {
406: // Must leave CData section first
407: if (state.inCData)
408: _printer.printText("]]>");
409: // XHTML: element names are lower case, DOM will be different
410: _printer.printText("</");
411: _printer.printText(state.rawName.toLowerCase());
412: _printer.printText('>');
413: }
414: } else {
415: if (state.empty)
416: _printer.printText('>');
417: // This element is not empty and that last content was
418: // another element, so print a line break before that
419: // last element and this element's closing tag.
420: // [keith] Provided this is not an anchor.
421: // HTML: some elements do not print closing tag (e.g. LI)
422: if (htmlName == null || !HTMLdtd.isOnlyOpening(htmlName)) {
423: if (_indenting && !state.preserveSpace
424: && state.afterElement)
425: _printer.breakLine();
426: // Must leave CData section first (Illegal in HTML, but still)
427: if (state.inCData)
428: _printer.printText("]]>");
429: _printer.printText("</");
430: _printer.printText(state.rawName);
431: _printer.printText('>');
432: }
433: }
434: // Leave the element state and update that of the parent
435: // (if we're not root) to not empty and after element.
436: state = leaveElementState();
437: // Temporary hack to prevent line breaks inside A/TD
438: if (htmlName == null
439: || (!htmlName.equalsIgnoreCase("A") && !htmlName
440: .equalsIgnoreCase("TD")))
441:
442: state.afterElement = true;
443: state.empty = false;
444: if (isDocumentState())
445: _printer.flush();
446: }
447:
448: //------------------------------------------//
449: // SAX document handler serializing methods //
450: //------------------------------------------//
451:
452: public void characters(char[] chars, int start, int length)
453: throws SAXException {
454: ElementState state;
455:
456: try {
457: // HTML: no CDATA section
458: state = content();
459: state.doCData = false;
460: super .characters(chars, start, length);
461: } catch (IOException except) {
462: throw new SAXException(except);
463: }
464: }
465:
466: public void startElement(String tagName, AttributeList attrs)
467: throws SAXException {
468: int i;
469: boolean preserveSpace;
470: ElementState state;
471: String name;
472: String value;
473:
474: try {
475: if (_printer == null)
476: throw new IllegalStateException(
477: "SER002 No writer supplied for serializer");
478:
479: state = getElementState();
480: if (isDocumentState()) {
481: // If this is the root element handle it differently.
482: // If the first root element in the document, serialize
483: // the document's DOCTYPE. Space preserving defaults
484: // to that of the output format.
485: if (!_started)
486: startDocument(tagName);
487: } else {
488: // For any other element, if first in parent, then
489: // close parent's opening tag and use the parnet's
490: // space preserving.
491: if (state.empty)
492: _printer.printText('>');
493: // Indent this element on a new line if the first
494: // content of the parent element or immediately
495: // following an element.
496: if (_indenting && !state.preserveSpace
497: && (state.empty || state.afterElement))
498: _printer.breakLine();
499: }
500: preserveSpace = state.preserveSpace;
501:
502: // Do not change the current element state yet.
503: // This only happens in endElement().
504:
505: // XHTML: element names are lower case, DOM will be different
506: _printer.printText('<');
507: if (_xhtml)
508: _printer.printText(tagName.toLowerCase());
509: else
510: _printer.printText(tagName);
511: _printer.indent();
512:
513: // For each attribute serialize it's name and value as one part,
514: // separated with a space so the element can be broken on
515: // multiple lines.
516: if (attrs != null) {
517: for (i = 0; i < attrs.getLength(); ++i) {
518: _printer.printSpace();
519: name = attrs.getName(i).toLowerCase();
520: ;
521: value = attrs.getValue(i);
522: if (_xhtml) {
523: // XHTML: print empty string for null values.
524: if (value == null) {
525: _printer.printText(name);
526: _printer.printText("=\"\"");
527: } else {
528: _printer.printText(name);
529: _printer.printText("=\"");
530: printEscaped(value);
531: _printer.printText('"');
532: }
533: } else {
534: // HTML: Empty values print as attribute name, no value.
535: // HTML: URI attributes will print unescaped
536: if (value == null) {
537: value = "";
538: }
539: if (!_format.getPreserveEmptyAttributes()
540: && value.length() == 0)
541: _printer.printText(name);
542: else if (HTMLdtd.isURI(tagName, name)) {
543: _printer.printText(name);
544: _printer.printText("=\"");
545: _printer.printText(escapeURI(value));
546: _printer.printText('"');
547: } else if (HTMLdtd.isBoolean(tagName, name))
548: _printer.printText(name);
549: else {
550: _printer.printText(name);
551: _printer.printText("=\"");
552: printEscaped(value);
553: _printer.printText('"');
554: }
555: }
556: }
557: }
558: if (HTMLdtd.isPreserveSpace(tagName))
559: preserveSpace = true;
560:
561: // Now it's time to enter a new element state
562: // with the tag name and space preserving.
563: // We still do not change the curent element state.
564: state = enterElementState(null, null, tagName,
565: preserveSpace);
566:
567: // Prevents line breaks inside A/TD
568: if (tagName.equalsIgnoreCase("A")
569: || tagName.equalsIgnoreCase("TD")) {
570: state.empty = false;
571: _printer.printText('>');
572: }
573:
574: // Handle SCRIPT and STYLE specifically by changing the
575: // state of the current element to CDATA (XHTML) or
576: // unescaped (HTML).
577: if (tagName.equalsIgnoreCase("SCRIPT")
578: || tagName.equalsIgnoreCase("STYLE")) {
579: if (_xhtml) {
580: // XHTML: Print contents as CDATA section
581: state.doCData = true;
582: } else {
583: // HTML: Print contents unescaped
584: state.unescaped = true;
585: }
586: }
587: } catch (IOException except) {
588: throw new SAXException(except);
589: }
590: }
591:
592: public void endElement(String tagName) throws SAXException {
593: endElement(null, null, tagName);
594: }
595:
596: //------------------------------------------//
597: // Generic node serializing methods methods //
598: //------------------------------------------//
599:
600: /**
601: * Called to serialize the document's DOCTYPE by the root element.
602: * The document type declaration must name the root element,
603: * but the root element is only known when that element is serialized,
604: * and not at the start of the document.
605: * <p>
606: * This method will check if it has not been called before ({@link #_started}),
607: * will serialize the document type declaration, and will serialize all
608: * pre-root comments and PIs that were accumulated in the document
609: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
610: * this is not the first root element of the document.
611: */
612: protected void startDocument(String rootTagName) throws IOException {
613: StringBuffer buffer;
614:
615: // Not supported in HTML/XHTML, but we still have to switch
616: // out of DTD mode.
617: _printer.leaveDTD();
618: if (!_started) {
619: // If the public and system identifiers were not specified
620: // in the output format, use the appropriate ones for HTML
621: // or XHTML.
622: if (_docTypePublicId == null && _docTypeSystemId == null) {
623: if (_xhtml) {
624: _docTypePublicId = HTMLdtd.XHTMLPublicId;
625: _docTypeSystemId = HTMLdtd.XHTMLSystemId;
626: } else {
627: _docTypePublicId = HTMLdtd.HTMLPublicId;
628: _docTypeSystemId = HTMLdtd.HTMLSystemId;
629: }
630: }
631:
632: if (!_format.getOmitDocumentType()) {
633: // XHTML: If public idnentifier and system identifier
634: // specified, print them, else print just system identifier
635: // HTML: If public identifier specified, print it with
636: // system identifier, if specified.
637: if (_docTypePublicId != null
638: && (!_xhtml || _docTypeSystemId != null)) {
639: _printer.printText("<!DOCTYPE HTML PUBLIC ");
640: printDoctypeURL(_docTypePublicId);
641: if (_docTypeSystemId != null) {
642: if (_indenting) {
643: _printer.breakLine();
644: _printer
645: .printText(" ");
646: } else
647: _printer.printText(' ');
648: printDoctypeURL(_docTypeSystemId);
649: }
650: _printer.printText('>');
651: _printer.breakLine();
652: } else if (_docTypeSystemId != null) {
653: _printer.printText("<!DOCTYPE HTML SYSTEM ");
654: printDoctypeURL(_docTypeSystemId);
655: _printer.printText('>');
656: _printer.breakLine();
657: }
658: }
659: }
660:
661: _started = true;
662: // Always serialize these, even if not te first root element.
663: serializePreRoot();
664: }
665:
666: /**
667: * Called to serialize a DOM element. Equivalent to calling {@link
668: * #startElement}, {@link #endElement} and serializing everything
669: * inbetween, but better optimized.
670: */
671: protected void serializeElement(Element elem) throws IOException {
672: Attr attr;
673: NamedNodeMap attrMap;
674: int i;
675: Node child;
676: ElementState state;
677: boolean preserveSpace;
678: String name;
679: String value;
680: String tagName;
681:
682: tagName = elem.getTagName();
683: state = getElementState();
684: if (isDocumentState()) {
685: // If this is the root element handle it differently.
686: // If the first root element in the document, serialize
687: // the document's DOCTYPE. Space preserving defaults
688: // to that of the output format.
689: if (!_started)
690: startDocument(tagName);
691: } else {
692: // For any other element, if first in parent, then
693: // close parent's opening tag and use the parnet's
694: // space preserving.
695: if (state.empty)
696: _printer.printText('>');
697: // Indent this element on a new line if the first
698: // content of the parent element or immediately
699: // following an element.
700: if (_indenting && !state.preserveSpace
701: && (state.empty || state.afterElement))
702: _printer.breakLine();
703: }
704: preserveSpace = state.preserveSpace;
705:
706: // Do not change the current element state yet.
707: // This only happens in endElement().
708:
709: // XHTML: element names are lower case, DOM will be different
710: _printer.printText('<');
711: if (_xhtml)
712: _printer.printText(tagName.toLowerCase());
713: else
714: _printer.printText(tagName);
715: _printer.indent();
716:
717: // Lookup the element's attribute, but only print specified
718: // attributes. (Unspecified attributes are derived from the DTD.
719: // For each attribute print it's name and value as one part,
720: // separated with a space so the element can be broken on
721: // multiple lines.
722: attrMap = elem.getAttributes();
723: if (attrMap != null) {
724: for (i = 0; i < attrMap.getLength(); ++i) {
725: attr = (Attr) attrMap.item(i);
726: name = attr.getName().toLowerCase();
727: value = attr.getValue();
728: if (attr.getSpecified()) {
729: _printer.printSpace();
730: if (_xhtml) {
731: // XHTML: print empty string for null values.
732: if (value == null) {
733: _printer.printText(name);
734: _printer.printText("=\"\"");
735: } else {
736: _printer.printText(name);
737: _printer.printText("=\"");
738: printEscaped(value);
739: _printer.printText('"');
740: }
741: } else {
742: // HTML: Empty values print as attribute name, no value.
743: // HTML: URI attributes will print unescaped
744: if (value == null) {
745: value = "";
746: }
747: if (!_format.getPreserveEmptyAttributes()
748: && value.length() == 0)
749: _printer.printText(name);
750: else if (HTMLdtd.isURI(tagName, name)) {
751: _printer.printText(name);
752: _printer.printText("=\"");
753: _printer.printText(escapeURI(value));
754: _printer.printText('"');
755: } else if (HTMLdtd.isBoolean(tagName, name))
756: _printer.printText(name);
757: else {
758: _printer.printText(name);
759: _printer.printText("=\"");
760: printEscaped(value);
761: _printer.printText('"');
762: }
763: }
764: }
765: }
766: }
767: if (HTMLdtd.isPreserveSpace(tagName))
768: preserveSpace = true;
769:
770: // If element has children, or if element is not an empty tag,
771: // serialize an opening tag.
772: if (elem.hasChildNodes() || !HTMLdtd.isEmptyTag(tagName)) {
773: // Enter an element state, and serialize the children
774: // one by one. Finally, end the element.
775: state = enterElementState(null, null, tagName,
776: preserveSpace);
777:
778: // Prevents line breaks inside A/TD
779: if (tagName.equalsIgnoreCase("A")
780: || tagName.equalsIgnoreCase("TD")) {
781: state.empty = false;
782: _printer.printText('>');
783: }
784:
785: // Handle SCRIPT and STYLE specifically by changing the
786: // state of the current element to CDATA (XHTML) or
787: // unescaped (HTML).
788: if (tagName.equalsIgnoreCase("SCRIPT")
789: || tagName.equalsIgnoreCase("STYLE")) {
790: if (_xhtml) {
791: // XHTML: Print contents as CDATA section
792: state.doCData = true;
793: } else {
794: // HTML: Print contents unescaped
795: state.unescaped = true;
796: }
797: }
798: child = elem.getFirstChild();
799: while (child != null) {
800: serializeNode(child);
801: child = child.getNextSibling();
802: }
803: endElementIO(null, null, tagName);
804: } else {
805: _printer.unindent();
806: // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
807: // HTML: Empty tags are defined as such in DTD no in document.
808: if (_xhtml)
809: _printer.printText(" />");
810: else
811: _printer.printText('>');
812: // After element but parent element is no longer empty.
813: state.afterElement = true;
814: state.empty = false;
815: if (isDocumentState())
816: _printer.flush();
817: }
818: }
819:
820: protected void characters(String text) throws IOException {
821: ElementState state;
822:
823: // HTML: no CDATA section
824: state = content();
825: super .characters(text);
826: }
827:
828: protected String getEntityRef(int ch) {
829: return HTMLdtd.fromChar(ch);
830: }
831:
832: protected String escapeURI(String uri) {
833: int index;
834:
835: // XXX Apparently Netscape doesn't like if we escape the URI
836: // using %nn, so we leave it as is, just remove any quotes.
837: index = uri.indexOf("\"");
838: if (index >= 0)
839: return uri.substring(0, index);
840: else
841: return uri;
842: }
843:
844: }
|