001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: // Sep 14, 2000:
019: // Fixed serializer to report IO exception directly, instead at
020: // the end of document processing.
021: // Reported by Patrick Higgins <phiggins@transzap.com>
022: // Aug 21, 2000:
023: // Fixed bug in startDocument not calling prepare.
024: // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
025: // Aug 21, 2000:
026: // Added ability to omit DOCTYPE declaration.
027: // Sep 1, 2000:
028: // If no output format is provided the serializer now defaults
029: // to ISO-8859-1 encoding. Reported by Mikael Staldal
030: // <d96-mst@d.kth.se>
031:
032: package org.apache.xml.serialize;
033:
034: import org.apache.xerces.dom.DOMMessageFormatter;
035:
036: import java.io.IOException;
037: import java.io.OutputStream;
038: import java.io.Writer;
039: import java.util.Enumeration;
040: import java.util.Locale;
041:
042: import org.w3c.dom.Attr;
043: import org.w3c.dom.Element;
044: import org.w3c.dom.NamedNodeMap;
045: import org.w3c.dom.Node;
046: import org.xml.sax.AttributeList;
047: import org.xml.sax.Attributes;
048: import org.xml.sax.SAXException;
049:
050: /**
051: * Implements an HTML/XHTML serializer supporting both DOM and SAX
052: * pretty serializing. HTML/XHTML mode is determined in the
053: * constructor. For usage instructions see {@link Serializer}.
054: * <p>
055: * If an output stream is used, the encoding is taken from the
056: * output format (defaults to <tt>UTF-8</tt>). If a writer is
057: * used, make sure the writer uses the same encoding (if applies)
058: * as specified in the output format.
059: * <p>
060: * The serializer supports both DOM and SAX. DOM serializing is done
061: * by calling {@link #serialize} and SAX serializing is done by firing
062: * SAX events and using the serializer as a document handler.
063: * <p>
064: * If an I/O exception occurs while serializing, the serializer
065: * will not throw an exception directly, but only throw it
066: * at the end of serializing (either DOM or SAX's {@link
067: * org.xml.sax.DocumentHandler#endDocument}.
068: * <p>
069: * For elements that are not specified as whitespace preserving,
070: * the serializer will potentially break long text lines at space
071: * boundaries, indent lines, and serialize elements on separate
072: * lines. Line terminators will be regarded as spaces, and
073: * spaces at beginning of line will be stripped.
074: * <p>
075: * XHTML is slightly different than HTML:
076: * <ul>
077: * <li>Element/attribute names are lower case and case matters
078: * <li>Attributes must specify value, even if empty string
079: * <li>Empty elements must have '/' in empty tag
080: * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
081: * </ul>
082: *
083: * @deprecated This class was deprecated in Xerces 2.6.2. It is
084: * recommended that new applications use JAXP's Transformation API
085: * for XML (TrAX) for serializing HTML. See the Xerces documentation
086: * for more information.
087: * @version $Revision: 464300 $ $Date: 2006-10-15 17:56:26 -0400 (Sun, 15 Oct 2006) $
088: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
089: * @see Serializer
090: */
091: public class HTMLSerializer extends BaseMarkupSerializer {
092:
093: /**
094: * True if serializing in XHTML format.
095: */
096: private boolean _xhtml;
097:
098: public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
099:
100: // for users to override XHTMLNamespace if need be.
101: private String fUserXHTMLNamespace = null;
102:
103: /**
104: * Constructs a new HTML/XHTML serializer depending on the value of
105: * <tt>xhtml</tt>. The serializer cannot be used without calling
106: * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
107: *
108: * @param xhtml True if XHTML serializing
109: */
110: protected HTMLSerializer(boolean xhtml, OutputFormat format) {
111: super (format);
112: _xhtml = xhtml;
113: }
114:
115: /**
116: * Constructs a new serializer. The serializer cannot be used without
117: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
118: * first.
119: */
120: public HTMLSerializer() {
121: this (false, new OutputFormat(Method.HTML, "ISO-8859-1", false));
122: }
123:
124: /**
125: * Constructs a new serializer. The serializer cannot be used without
126: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
127: * first.
128: */
129: public HTMLSerializer(OutputFormat format) {
130: this (false, format != null ? format : new OutputFormat(
131: Method.HTML, "ISO-8859-1", false));
132: }
133:
134: /**
135: * Constructs a new serializer that writes to the specified writer
136: * using the specified output format. If <tt>format</tt> is null,
137: * will use a default output format.
138: *
139: * @param writer The writer to use
140: * @param format The output format to use, null for the default
141: */
142: public HTMLSerializer(Writer writer, OutputFormat format) {
143: this (false, format != null ? format : new OutputFormat(
144: Method.HTML, "ISO-8859-1", false));
145: setOutputCharStream(writer);
146: }
147:
148: /**
149: * Constructs a new serializer that writes to the specified output
150: * stream using the specified output format. If <tt>format</tt>
151: * is null, will use a default output format.
152: *
153: * @param output The output stream to use
154: * @param format The output format to use, null for the default
155: */
156: public HTMLSerializer(OutputStream output, OutputFormat format) {
157: this (false, format != null ? format : new OutputFormat(
158: Method.HTML, "ISO-8859-1", false));
159: setOutputByteStream(output);
160: }
161:
162: public void setOutputFormat(OutputFormat format) {
163: super .setOutputFormat(format != null ? format
164: : new OutputFormat(Method.HTML, "ISO-8859-1", false));
165: }
166:
167: // Set value for alternate XHTML namespace.
168: public void setXHTMLNamespace(String newNamespace) {
169: fUserXHTMLNamespace = newNamespace;
170: } // setXHTMLNamespace(String)
171:
172: //-----------------------------------------//
173: // SAX content handler serializing methods //
174: //-----------------------------------------//
175:
176: public void startElement(String namespaceURI, String localName,
177: String rawName, Attributes attrs) throws SAXException {
178: int i;
179: boolean preserveSpace;
180: ElementState state;
181: String name;
182: String value;
183: String htmlName;
184: boolean addNSAttr = false;
185:
186: try {
187: if (_printer == null)
188: throw new IllegalStateException(DOMMessageFormatter
189: .formatMessage(
190: DOMMessageFormatter.SERIALIZER_DOMAIN,
191: "NoWriterSupplied", null));
192:
193: state = getElementState();
194: if (isDocumentState()) {
195: // If this is the root element handle it differently.
196: // If the first root element in the document, serialize
197: // the document's DOCTYPE. Space preserving defaults
198: // to that of the output format.
199: if (!_started)
200: startDocument((localName == null || localName
201: .length() == 0) ? rawName : localName);
202: } else {
203: // For any other element, if first in parent, then
204: // close parent's opening tag and use the parnet's
205: // space preserving.
206: if (state.empty)
207: _printer.printText('>');
208: // Indent this element on a new line if the first
209: // content of the parent element or immediately
210: // following an element.
211: if (_indenting && !state.preserveSpace
212: && (state.empty || state.afterElement))
213: _printer.breakLine();
214: }
215: preserveSpace = state.preserveSpace;
216:
217: // Do not change the current element state yet.
218: // This only happens in endElement().
219:
220: // As per SAX2, the namespace URI is an empty string if the element has no
221: // namespace URI, or namespaces is turned off. The check against null protects
222: // against broken SAX implementations, so I've left it there. - mrglavas
223: boolean hasNamespaceURI = (namespaceURI != null && namespaceURI
224: .length() != 0);
225:
226: // SAX2: rawName (QName) could be empty string if
227: // namespace-prefixes property is false.
228: if (rawName == null || rawName.length() == 0) {
229: rawName = localName;
230: if (hasNamespaceURI) {
231: String prefix;
232: prefix = getPrefix(namespaceURI);
233: if (prefix != null && prefix.length() != 0)
234: rawName = prefix + ":" + localName;
235: }
236: addNSAttr = true;
237: }
238: if (!hasNamespaceURI)
239: htmlName = rawName;
240: else {
241: if (namespaceURI.equals(XHTMLNamespace)
242: || (fUserXHTMLNamespace != null && fUserXHTMLNamespace
243: .equals(namespaceURI)))
244: htmlName = localName;
245: else
246: htmlName = null;
247: }
248:
249: // XHTML: element names are lower case, DOM will be different
250: _printer.printText('<');
251: if (_xhtml)
252: _printer.printText(rawName.toLowerCase(Locale.ENGLISH));
253: else
254: _printer.printText(rawName);
255: _printer.indent();
256:
257: // For each attribute serialize it's name and value as one part,
258: // separated with a space so the element can be broken on
259: // multiple lines.
260: if (attrs != null) {
261: for (i = 0; i < attrs.getLength(); ++i) {
262: _printer.printSpace();
263: name = attrs.getQName(i)
264: .toLowerCase(Locale.ENGLISH);
265: value = attrs.getValue(i);
266: if (_xhtml || hasNamespaceURI) {
267: // XHTML: print empty string for null values.
268: if (value == null) {
269: _printer.printText(name);
270: _printer.printText("=\"\"");
271: } else {
272: _printer.printText(name);
273: _printer.printText("=\"");
274: printEscaped(value);
275: _printer.printText('"');
276: }
277: } else {
278: // HTML: Empty values print as attribute name, no value.
279: // HTML: URI attributes will print unescaped
280: if (value == null) {
281: value = "";
282: }
283: if (!_format.getPreserveEmptyAttributes()
284: && value.length() == 0)
285: _printer.printText(name);
286: else if (HTMLdtd.isURI(rawName, name)) {
287: _printer.printText(name);
288: _printer.printText("=\"");
289: _printer.printText(escapeURI(value));
290: _printer.printText('"');
291: } else if (HTMLdtd.isBoolean(rawName, name))
292: _printer.printText(name);
293: else {
294: _printer.printText(name);
295: _printer.printText("=\"");
296: printEscaped(value);
297: _printer.printText('"');
298: }
299: }
300: }
301: }
302: if (htmlName != null && HTMLdtd.isPreserveSpace(htmlName))
303: preserveSpace = true;
304:
305: if (addNSAttr) {
306: Enumeration keys;
307:
308: keys = _prefixes.keys();
309: while (keys.hasMoreElements()) {
310: _printer.printSpace();
311: value = (String) keys.nextElement();
312: name = (String) _prefixes.get(value);
313: if (name.length() == 0) {
314: _printer.printText("xmlns=\"");
315: printEscaped(value);
316: _printer.printText('"');
317: } else {
318: _printer.printText("xmlns:");
319: _printer.printText(name);
320: _printer.printText("=\"");
321: printEscaped(value);
322: _printer.printText('"');
323: }
324: }
325: }
326:
327: // Now it's time to enter a new element state
328: // with the tag name and space preserving.
329: // We still do not change the curent element state.
330: state = enterElementState(namespaceURI, localName, rawName,
331: preserveSpace);
332:
333: // Prevents line breaks inside A/TD
334:
335: if (htmlName != null
336: && (htmlName.equalsIgnoreCase("A") || htmlName
337: .equalsIgnoreCase("TD"))) {
338: state.empty = false;
339: _printer.printText('>');
340: }
341:
342: // Handle SCRIPT and STYLE specifically by changing the
343: // state of the current element to CDATA (XHTML) or
344: // unescaped (HTML).
345: if (htmlName != null
346: && (rawName.equalsIgnoreCase("SCRIPT") || rawName
347: .equalsIgnoreCase("STYLE"))) {
348: if (_xhtml) {
349: // XHTML: Print contents as CDATA section
350: state.doCData = true;
351: } else {
352: // HTML: Print contents unescaped
353: state.unescaped = true;
354: }
355: }
356: } catch (IOException except) {
357: throw new SAXException(except);
358: }
359: }
360:
361: public void endElement(String namespaceURI, String localName,
362: String rawName) throws SAXException {
363: try {
364: endElementIO(namespaceURI, localName, rawName);
365: } catch (IOException except) {
366: throw new SAXException(except);
367: }
368: }
369:
370: public void endElementIO(String namespaceURI, String localName,
371: String rawName) throws IOException {
372: ElementState state;
373: String htmlName;
374:
375: // Works much like content() with additions for closing
376: // an element. Note the different checks for the closed
377: // element's state and the parent element's state.
378: _printer.unindent();
379: state = getElementState();
380:
381: if (state.namespaceURI == null
382: || state.namespaceURI.length() == 0)
383: htmlName = state.rawName;
384: else {
385: if (state.namespaceURI.equals(XHTMLNamespace)
386: || (fUserXHTMLNamespace != null && fUserXHTMLNamespace
387: .equals(state.namespaceURI)))
388: htmlName = state.localName;
389: else
390: htmlName = null;
391: }
392:
393: if (_xhtml) {
394: if (state.empty) {
395: _printer.printText(" />");
396: } else {
397: // Must leave CData section first
398: if (state.inCData)
399: _printer.printText("]]>");
400: // XHTML: element names are lower case, DOM will be different
401: _printer.printText("</");
402: _printer.printText(state.rawName
403: .toLowerCase(Locale.ENGLISH));
404: _printer.printText('>');
405: }
406: } else {
407: if (state.empty)
408: _printer.printText('>');
409: // This element is not empty and that last content was
410: // another element, so print a line break before that
411: // last element and this element's closing tag.
412: // [keith] Provided this is not an anchor.
413: // HTML: some elements do not print closing tag (e.g. LI)
414: if (htmlName == null || !HTMLdtd.isOnlyOpening(htmlName)) {
415: if (_indenting && !state.preserveSpace
416: && state.afterElement)
417: _printer.breakLine();
418: // Must leave CData section first (Illegal in HTML, but still)
419: if (state.inCData)
420: _printer.printText("]]>");
421: _printer.printText("</");
422: _printer.printText(state.rawName);
423: _printer.printText('>');
424: }
425: }
426: // Leave the element state and update that of the parent
427: // (if we're not root) to not empty and after element.
428: state = leaveElementState();
429: // Temporary hack to prevent line breaks inside A/TD
430: if (htmlName == null
431: || (!htmlName.equalsIgnoreCase("A") && !htmlName
432: .equalsIgnoreCase("TD")))
433:
434: state.afterElement = true;
435: state.empty = false;
436: if (isDocumentState())
437: _printer.flush();
438: }
439:
440: //------------------------------------------//
441: // SAX document handler serializing methods //
442: //------------------------------------------//
443:
444: public void characters(char[] chars, int start, int length)
445: throws SAXException {
446: ElementState state;
447:
448: try {
449: // HTML: no CDATA section
450: state = content();
451: state.doCData = false;
452: super .characters(chars, start, length);
453: } catch (IOException except) {
454: throw new SAXException(except);
455: }
456: }
457:
458: public void startElement(String tagName, AttributeList attrs)
459: throws SAXException {
460: int i;
461: boolean preserveSpace;
462: ElementState state;
463: String name;
464: String value;
465:
466: try {
467: if (_printer == null)
468: throw new IllegalStateException(DOMMessageFormatter
469: .formatMessage(
470: DOMMessageFormatter.SERIALIZER_DOMAIN,
471: "NoWriterSupplied", null));
472:
473: state = getElementState();
474: if (isDocumentState()) {
475: // If this is the root element handle it differently.
476: // If the first root element in the document, serialize
477: // the document's DOCTYPE. Space preserving defaults
478: // to that of the output format.
479: if (!_started)
480: startDocument(tagName);
481: } else {
482: // For any other element, if first in parent, then
483: // close parent's opening tag and use the parnet's
484: // space preserving.
485: if (state.empty)
486: _printer.printText('>');
487: // Indent this element on a new line if the first
488: // content of the parent element or immediately
489: // following an element.
490: if (_indenting && !state.preserveSpace
491: && (state.empty || state.afterElement))
492: _printer.breakLine();
493: }
494: preserveSpace = state.preserveSpace;
495:
496: // Do not change the current element state yet.
497: // This only happens in endElement().
498:
499: // XHTML: element names are lower case, DOM will be different
500: _printer.printText('<');
501: if (_xhtml)
502: _printer.printText(tagName.toLowerCase(Locale.ENGLISH));
503: else
504: _printer.printText(tagName);
505: _printer.indent();
506:
507: // For each attribute serialize it's name and value as one part,
508: // separated with a space so the element can be broken on
509: // multiple lines.
510: if (attrs != null) {
511: for (i = 0; i < attrs.getLength(); ++i) {
512: _printer.printSpace();
513: name = attrs.getName(i).toLowerCase(Locale.ENGLISH);
514: value = attrs.getValue(i);
515: if (_xhtml) {
516: // XHTML: print empty string for null values.
517: if (value == null) {
518: _printer.printText(name);
519: _printer.printText("=\"\"");
520: } else {
521: _printer.printText(name);
522: _printer.printText("=\"");
523: printEscaped(value);
524: _printer.printText('"');
525: }
526: } else {
527: // HTML: Empty values print as attribute name, no value.
528: // HTML: URI attributes will print unescaped
529: if (value == null) {
530: value = "";
531: }
532: if (!_format.getPreserveEmptyAttributes()
533: && value.length() == 0)
534: _printer.printText(name);
535: else if (HTMLdtd.isURI(tagName, name)) {
536: _printer.printText(name);
537: _printer.printText("=\"");
538: _printer.printText(escapeURI(value));
539: _printer.printText('"');
540: } else if (HTMLdtd.isBoolean(tagName, name))
541: _printer.printText(name);
542: else {
543: _printer.printText(name);
544: _printer.printText("=\"");
545: printEscaped(value);
546: _printer.printText('"');
547: }
548: }
549: }
550: }
551: if (HTMLdtd.isPreserveSpace(tagName))
552: preserveSpace = true;
553:
554: // Now it's time to enter a new element state
555: // with the tag name and space preserving.
556: // We still do not change the curent element state.
557: state = enterElementState(null, null, tagName,
558: preserveSpace);
559:
560: // Prevents line breaks inside A/TD
561: if (tagName.equalsIgnoreCase("A")
562: || tagName.equalsIgnoreCase("TD")) {
563: state.empty = false;
564: _printer.printText('>');
565: }
566:
567: // Handle SCRIPT and STYLE specifically by changing the
568: // state of the current element to CDATA (XHTML) or
569: // unescaped (HTML).
570: if (tagName.equalsIgnoreCase("SCRIPT")
571: || tagName.equalsIgnoreCase("STYLE")) {
572: if (_xhtml) {
573: // XHTML: Print contents as CDATA section
574: state.doCData = true;
575: } else {
576: // HTML: Print contents unescaped
577: state.unescaped = true;
578: }
579: }
580: } catch (IOException except) {
581: throw new SAXException(except);
582: }
583: }
584:
585: public void endElement(String tagName) throws SAXException {
586: endElement(null, null, tagName);
587: }
588:
589: //------------------------------------------//
590: // Generic node serializing methods methods //
591: //------------------------------------------//
592:
593: /**
594: * Called to serialize the document's DOCTYPE by the root element.
595: * The document type declaration must name the root element,
596: * but the root element is only known when that element is serialized,
597: * and not at the start of the document.
598: * <p>
599: * This method will check if it has not been called before ({@link #_started}),
600: * will serialize the document type declaration, and will serialize all
601: * pre-root comments and PIs that were accumulated in the document
602: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
603: * this is not the first root element of the document.
604: */
605: protected void startDocument(String rootTagName) throws IOException {
606: // Not supported in HTML/XHTML, but we still have to switch
607: // out of DTD mode.
608: _printer.leaveDTD();
609: if (!_started) {
610: // If the public and system identifiers were not specified
611: // in the output format, use the appropriate ones for HTML
612: // or XHTML.
613: if (_docTypePublicId == null && _docTypeSystemId == null) {
614: if (_xhtml) {
615: _docTypePublicId = HTMLdtd.XHTMLPublicId;
616: _docTypeSystemId = HTMLdtd.XHTMLSystemId;
617: } else {
618: _docTypePublicId = HTMLdtd.HTMLPublicId;
619: _docTypeSystemId = HTMLdtd.HTMLSystemId;
620: }
621: }
622:
623: if (!_format.getOmitDocumentType()) {
624: // XHTML: If public identifier and system identifier
625: // specified, print them, else print just system identifier
626: // HTML: If public identifier specified, print it with
627: // system identifier, if specified.
628: // XHTML requires that all element names are lower case, so the
629: // root on the DOCTYPE must be 'html'. - mrglavas
630: if (_docTypePublicId != null
631: && (!_xhtml || _docTypeSystemId != null)) {
632: if (_xhtml) {
633: _printer.printText("<!DOCTYPE html PUBLIC ");
634: } else {
635: _printer.printText("<!DOCTYPE HTML PUBLIC ");
636: }
637: printDoctypeURL(_docTypePublicId);
638: if (_docTypeSystemId != null) {
639: if (_indenting) {
640: _printer.breakLine();
641: _printer
642: .printText(" ");
643: } else
644: _printer.printText(' ');
645: printDoctypeURL(_docTypeSystemId);
646: }
647: _printer.printText('>');
648: _printer.breakLine();
649: } else if (_docTypeSystemId != null) {
650: if (_xhtml) {
651: _printer.printText("<!DOCTYPE html SYSTEM ");
652: } else {
653: _printer.printText("<!DOCTYPE HTML SYSTEM ");
654: }
655: printDoctypeURL(_docTypeSystemId);
656: _printer.printText('>');
657: _printer.breakLine();
658: }
659: }
660: }
661:
662: _started = true;
663: // Always serialize these, even if not te first root element.
664: serializePreRoot();
665: }
666:
667: /**
668: * Called to serialize a DOM element. Equivalent to calling {@link
669: * #startElement}, {@link #endElement} and serializing everything
670: * inbetween, but better optimized.
671: */
672: protected void serializeElement(Element elem) throws IOException {
673: Attr attr;
674: NamedNodeMap attrMap;
675: int i;
676: Node child;
677: ElementState state;
678: boolean preserveSpace;
679: String name;
680: String value;
681: String tagName;
682:
683: tagName = elem.getTagName();
684: state = getElementState();
685: if (isDocumentState()) {
686: // If this is the root element handle it differently.
687: // If the first root element in the document, serialize
688: // the document's DOCTYPE. Space preserving defaults
689: // to that of the output format.
690: if (!_started)
691: startDocument(tagName);
692: } else {
693: // For any other element, if first in parent, then
694: // close parent's opening tag and use the parnet's
695: // space preserving.
696: if (state.empty)
697: _printer.printText('>');
698: // Indent this element on a new line if the first
699: // content of the parent element or immediately
700: // following an element.
701: if (_indenting && !state.preserveSpace
702: && (state.empty || state.afterElement))
703: _printer.breakLine();
704: }
705: preserveSpace = state.preserveSpace;
706:
707: // Do not change the current element state yet.
708: // This only happens in endElement().
709:
710: // XHTML: element names are lower case, DOM will be different
711: _printer.printText('<');
712: if (_xhtml)
713: _printer.printText(tagName.toLowerCase(Locale.ENGLISH));
714: else
715: _printer.printText(tagName);
716: _printer.indent();
717:
718: // Lookup the element's attribute, but only print specified
719: // attributes. (Unspecified attributes are derived from the DTD.
720: // For each attribute print it's name and value as one part,
721: // separated with a space so the element can be broken on
722: // multiple lines.
723: attrMap = elem.getAttributes();
724: if (attrMap != null) {
725: for (i = 0; i < attrMap.getLength(); ++i) {
726: attr = (Attr) attrMap.item(i);
727: name = attr.getName().toLowerCase(Locale.ENGLISH);
728: value = attr.getValue();
729: if (attr.getSpecified()) {
730: _printer.printSpace();
731: if (_xhtml) {
732: // XHTML: print empty string for null values.
733: if (value == null) {
734: _printer.printText(name);
735: _printer.printText("=\"\"");
736: } else {
737: _printer.printText(name);
738: _printer.printText("=\"");
739: printEscaped(value);
740: _printer.printText('"');
741: }
742: } else {
743: // HTML: Empty values print as attribute name, no value.
744: // HTML: URI attributes will print unescaped
745: if (value == null) {
746: value = "";
747: }
748: if (!_format.getPreserveEmptyAttributes()
749: && value.length() == 0)
750: _printer.printText(name);
751: else if (HTMLdtd.isURI(tagName, name)) {
752: _printer.printText(name);
753: _printer.printText("=\"");
754: _printer.printText(escapeURI(value));
755: _printer.printText('"');
756: } else if (HTMLdtd.isBoolean(tagName, name))
757: _printer.printText(name);
758: else {
759: _printer.printText(name);
760: _printer.printText("=\"");
761: printEscaped(value);
762: _printer.printText('"');
763: }
764: }
765: }
766: }
767: }
768: if (HTMLdtd.isPreserveSpace(tagName))
769: preserveSpace = true;
770:
771: // If element has children, or if element is not an empty tag,
772: // serialize an opening tag.
773: if (elem.hasChildNodes() || !HTMLdtd.isEmptyTag(tagName)) {
774: // Enter an element state, and serialize the children
775: // one by one. Finally, end the element.
776: state = enterElementState(null, null, tagName,
777: preserveSpace);
778:
779: // Prevents line breaks inside A/TD
780: if (tagName.equalsIgnoreCase("A")
781: || tagName.equalsIgnoreCase("TD")) {
782: state.empty = false;
783: _printer.printText('>');
784: }
785:
786: // Handle SCRIPT and STYLE specifically by changing the
787: // state of the current element to CDATA (XHTML) or
788: // unescaped (HTML).
789: if (tagName.equalsIgnoreCase("SCRIPT")
790: || tagName.equalsIgnoreCase("STYLE")) {
791: if (_xhtml) {
792: // XHTML: Print contents as CDATA section
793: state.doCData = true;
794: } else {
795: // HTML: Print contents unescaped
796: state.unescaped = true;
797: }
798: }
799: child = elem.getFirstChild();
800: while (child != null) {
801: serializeNode(child);
802: child = child.getNextSibling();
803: }
804: endElementIO(null, null, tagName);
805: } else {
806: _printer.unindent();
807: // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
808: // HTML: Empty tags are defined as such in DTD no in document.
809: if (_xhtml)
810: _printer.printText(" />");
811: else
812: _printer.printText('>');
813: // After element but parent element is no longer empty.
814: state.afterElement = true;
815: state.empty = false;
816: if (isDocumentState())
817: _printer.flush();
818: }
819: }
820:
821: protected void characters(String text) throws IOException {
822: // HTML: no CDATA section
823: content();
824: super .characters(text);
825: }
826:
827: protected String getEntityRef(int ch) {
828: return HTMLdtd.fromChar(ch);
829: }
830:
831: protected String escapeURI(String uri) {
832: int index;
833:
834: // XXX Apparently Netscape doesn't like if we escape the URI
835: // using %nn, so we leave it as is, just remove any quotes.
836: index = uri.indexOf("\"");
837: if (index >= 0) {
838: return uri.substring(0, index);
839: }
840: return uri;
841: }
842:
843: }
|