001: /*
002: * Copyright 1999-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: // Sep 14, 2000:
018: // Fixed serializer to report IO exception directly, instead at
019: // the end of document processing.
020: // Reported by Patrick Higgins <phiggins@transzap.com>
021: // Aug 21, 2000:
022: // Fixed bug in startDocument not calling prepare.
023: // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
024: // Aug 21, 2000:
025: // Added ability to omit DOCTYPE declaration.
026: // Sep 1, 2000:
027: // If no output format is provided the serializer now defaults
028: // to ISO-8859-1 encoding. Reported by Mikael Staldal
029: // <d96-mst@d.kth.se>
030:
031: package org.jasig.portal.serialize;
032:
033: import org.apache.xerces.dom.DOMMessageFormatter;
034:
035: import java.io.IOException;
036: import java.io.OutputStream;
037: import java.io.Writer;
038: import java.util.Enumeration;
039: import java.util.Locale;
040:
041: import org.jasig.portal.IAnchoringSerializer;
042: import org.w3c.dom.Attr;
043: import org.w3c.dom.Element;
044: import org.w3c.dom.NamedNodeMap;
045: import org.w3c.dom.Node;
046: import org.xml.sax.AttributeList;
047: import org.xml.sax.Attributes;
048: import org.xml.sax.SAXException;
049:
050: /**
051: * Implements an HTML/XHTML serializer supporting both DOM and SAX
052: * pretty serializing. HTML/XHTML mode is determined in the
053: * constructor. For usage instructions see {@link Serializer}.
054: * <p>
055: * If an output stream is used, the encoding is taken from the
056: * output format (defaults to <tt>UTF-8</tt>). If a writer is
057: * used, make sure the writer uses the same encoding (if applies)
058: * as specified in the output format.
059: * <p>
060: * The serializer supports both DOM and SAX. DOM serializing is done
061: * by calling {@link #serialize} and SAX serializing is done by firing
062: * SAX events and using the serializer as a document handler.
063: * <p>
064: * If an I/O exception occurs while serializing, the serializer
065: * will not throw an exception directly, but only throw it
066: * at the end of serializing (either DOM or SAX's {@link
067: * org.xml.sax.DocumentHandler#endDocument}.
068: * <p>
069: * For elements that are not specified as whitespace preserving,
070: * the serializer will potentially break long text lines at space
071: * boundaries, indent lines, and serialize elements on separate
072: * lines. Line terminators will be regarded as spaces, and
073: * spaces at beginning of line will be stripped.
074: * <p>
075: * XHTML is slightly different than HTML:
076: * <ul>
077: * <li>Element/attribute names are lower case and case matters
078: * <li>Attributes must specify value, even if empty string
079: * <li>Empty elements must have '/' in empty tag
080: * <li>Contents of SCRIPT and STYLE elements serialized as CDATA
081: * </ul>
082: *
083: * @deprecated This class was deprecated in Xerces 2.6.2. It is
084: * recommended that new applications use JAXP's Transformation API
085: * for XML (TrAX) for serializing HTML. See the Xerces documentation
086: * for more information.
087: * @version $Revision: 42283 $ $Date: 2007-08-04 18:44:27 -0700 (Sat, 04 Aug 2007) $
088: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
089: * @see Serializer
090: */
091: public class HTMLSerializer extends BaseMarkupSerializer implements
092: IAnchoringSerializer {
093:
094: /**
095: * True if serializing in XHTML format.
096: */
097: private boolean _xhtml;
098:
099: public static final String XHTMLNamespace = "http://www.w3.org/1999/xhtml";
100:
101: // for users to override XHTMLNamespace if need be.
102: private String fUserXHTMLNamespace = null;
103:
104: // We're using this one instead of BaseMarkupSerializer's _docTypePublicId
105: // because it does funky stuff, changing it's value throughout the parsing
106: // of a document. This will enable us to set the DOCTYPE for any document.
107: private String docTypePublicId = null;
108: private String docTypeSystemId = null;
109:
110: /**
111: * Constructs a new HTML/XHTML serializer depending on the value of
112: * <tt>xhtml</tt>. The serializer cannot be used without calling
113: * {@link #setOutputCharStream} or {@link #setOutputByteStream} first.
114: *
115: * @param xhtml True if XHTML serializing
116: */
117: protected HTMLSerializer(boolean xhtml, OutputFormat format) {
118: super (format);
119: _xhtml = xhtml;
120: if (format != null) {
121: docTypePublicId = format.getDoctypePublic();
122: docTypeSystemId = format.getDoctypeSystem();
123: }
124: }
125:
126: /**
127: * Constructs a new serializer. The serializer cannot be used without
128: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
129: * first.
130: */
131: public HTMLSerializer() {
132: this (false, new OutputFormat(Method.HTML, "ISO-8859-1", false));
133: }
134:
135: /**
136: * Constructs a new serializer. The serializer cannot be used without
137: * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
138: * first.
139: */
140: public HTMLSerializer(OutputFormat format) {
141: this (false, format != null ? format : new OutputFormat(
142: Method.HTML, "ISO-8859-1", false));
143: }
144:
145: /**
146: * Constructs a new serializer that writes to the specified writer
147: * using the specified output format. If <tt>format</tt> is null,
148: * will use a default output format.
149: *
150: * @param writer The writer to use
151: * @param format The output format to use, null for the default
152: */
153: public HTMLSerializer(Writer writer, OutputFormat format) {
154: this (false, format != null ? format : new OutputFormat(
155: Method.HTML, "ISO-8859-1", false));
156: setOutputCharStream(writer);
157: }
158:
159: /**
160: * Constructs a new serializer that writes to the specified output
161: * stream using the specified output format. If <tt>format</tt>
162: * is null, will use a default output format.
163: *
164: * @param output The output stream to use
165: * @param format The output format to use, null for the default
166: */
167: public HTMLSerializer(OutputStream output, OutputFormat format) {
168: this (false, format != null ? format : new OutputFormat(
169: Method.HTML, "ISO-8859-1", false));
170: setOutputByteStream(output);
171: }
172:
173: public void setOutputFormat(OutputFormat format) {
174: super .setOutputFormat(format != null ? format
175: : new OutputFormat(Method.HTML, "ISO-8859-1", false));
176: }
177:
178: // Set value for alternate XHTML namespace.
179: public void setXHTMLNamespace(String newNamespace) {
180: fUserXHTMLNamespace = newNamespace;
181: } // setXHTMLNamespace(String)
182:
183: //-----------------------------------------//
184: // SAX content handler serializing methods //
185: //-----------------------------------------//
186:
187: public void startElement(String namespaceURI, String localName,
188: String rawName, Attributes attrs) throws SAXException {
189: int i;
190: boolean preserveSpace;
191: ElementState state;
192: String name;
193: String value;
194: String htmlName;
195: boolean addNSAttr = false;
196:
197: try {
198: if (_printer == null)
199: throw new IllegalStateException(DOMMessageFormatter
200: .formatMessage(
201: DOMMessageFormatter.SERIALIZER_DOMAIN,
202: "NoWriterSupplied", null));
203:
204: state = getElementState();
205: if (isDocumentState()) {
206: // If this is the root element handle it differently.
207: // If the first root element in the document, serialize
208: // the document's DOCTYPE. Space preserving defaults
209: // to that of the output format.
210: if (!_started)
211: startDocument((localName == null || localName
212: .length() == 0) ? rawName : localName);
213: } else {
214: // For any other element, if first in parent, then
215: // close parent's opening tag and use the parnet's
216: // space preserving.
217: if (state.empty)
218: _printer.printText('>');
219: // Indent this element on a new line if the first
220: // content of the parent element or immediately
221: // following an element.
222: if (_indenting && !state.preserveSpace
223: && (state.empty || state.afterElement))
224: _printer.breakLine();
225: }
226: preserveSpace = state.preserveSpace;
227:
228: // Do not change the current element state yet.
229: // This only happens in endElement().
230:
231: // As per SAX2, the namespace URI is an empty string if the element has no
232: // namespace URI, or namespaces is turned off. The check against null protects
233: // against broken SAX implementations, so I've left it there. - mrglavas
234: boolean hasNamespaceURI = (namespaceURI != null && namespaceURI
235: .length() != 0);
236:
237: // SAX2: rawName (QName) could be empty string if
238: // namespace-prefixes property is false.
239: if (rawName == null || rawName.length() == 0) {
240: rawName = localName;
241: if (hasNamespaceURI) {
242: String prefix;
243: prefix = getPrefix(namespaceURI);
244: if (prefix != null && prefix.length() != 0)
245: rawName = prefix + ":" + localName;
246: }
247: addNSAttr = true;
248: }
249: if (!hasNamespaceURI)
250: htmlName = rawName;
251: else {
252: if (namespaceURI.equals(XHTMLNamespace)
253: || (fUserXHTMLNamespace != null && fUserXHTMLNamespace
254: .equals(namespaceURI)))
255: htmlName = localName;
256: else
257: htmlName = null;
258: }
259:
260: // XHTML: element names are lower case, DOM will be different
261: _printer.printText('<');
262: if (_xhtml)
263: _printer.printText(rawName.toLowerCase(Locale.ENGLISH));
264: else
265: _printer.printText(rawName);
266: _printer.indent();
267:
268: // For each attribute serialize it's name and value as one part,
269: // separated with a space so the element can be broken on
270: // multiple lines.
271: if (attrs != null) {
272: for (i = 0; i < attrs.getLength(); ++i) {
273: _printer.printSpace();
274: name = attrs.getQName(i)
275: .toLowerCase(Locale.ENGLISH);
276: value = attrs.getValue(i);
277: if (_xhtml || hasNamespaceURI) {
278: // XHTML: print empty string for null values.
279: if (value == null) {
280: _printer.printText(name);
281: _printer.printText("=\"\"");
282: } else {
283: _printer.printText(name);
284: _printer.printText("=\"");
285: value = ProxyWriter.considerProxyRewrite(
286: name, localName, value);
287: value = appendAnchorIfNecessary(rawName
288: .toLowerCase(), name, value);
289: printEscaped(value);
290: _printer.printText('"');
291: }
292: } else {
293: // HTML: Empty values print as attribute name, no value.
294: // HTML: URI attributes will print unescaped
295: if (value == null) {
296: value = "";
297: }
298: if (!_format.getPreserveEmptyAttributes()
299: && value.length() == 0)
300: _printer.printText(name);
301: else if (HTMLdtd.isURI(rawName, name)) {
302: _printer.printText(name);
303: _printer.printText("=\"");
304: value = ProxyWriter.considerProxyRewrite(
305: name, localName, value);
306: _printer.printText(escapeURI(value));
307: _printer.printText('"');
308: } else if (HTMLdtd.isBoolean(rawName, name))
309: _printer.printText(name);
310: else {
311: _printer.printText(name);
312: _printer.printText("=\"");
313: printEscaped(value);
314: _printer.printText('"');
315: }
316: }
317: }
318: }
319: if (htmlName != null && HTMLdtd.isPreserveSpace(htmlName))
320: preserveSpace = true;
321:
322: if (addNSAttr) {
323: Enumeration keys;
324:
325: keys = _prefixes.keys();
326: while (keys.hasMoreElements()) {
327: _printer.printSpace();
328: value = (String) keys.nextElement();
329: name = (String) _prefixes.get(value);
330: if (name.length() == 0) {
331: _printer.printText("xmlns=\"");
332: printEscaped(value);
333: _printer.printText('"');
334: } else {
335: _printer.printText("xmlns:");
336: _printer.printText(name);
337: _printer.printText("=\"");
338: printEscaped(value);
339: _printer.printText('"');
340: }
341: }
342: }
343:
344: // Now it's time to enter a new element state
345: // with the tag name and space preserving.
346: // We still do not change the curent element state.
347: state = enterElementState(namespaceURI, localName, rawName,
348: preserveSpace);
349:
350: // Prevents line breaks inside A/TD
351:
352: if (htmlName != null
353: && (htmlName.equalsIgnoreCase("A") || htmlName
354: .equalsIgnoreCase("TD"))) {
355: state.empty = false;
356: _printer.printText('>');
357: }
358:
359: // Handle SCRIPT and STYLE specifically by changing the
360: // state of the current element to CDATA (XHTML) or
361: // unescaped (HTML).
362: if (htmlName != null
363: && (rawName.equalsIgnoreCase("SCRIPT") || rawName
364: .equalsIgnoreCase("STYLE"))) {
365: if (_xhtml) {
366: // XHTML: Print contents as CDATA section
367: state.doCData = true;
368: if (rawName.equalsIgnoreCase("SCRIPT")) {
369: state.inScript = true;
370: } else {
371: state.inScript = false;
372: }
373:
374: } else {
375: // HTML: Print contents unescaped
376: state.unescaped = true;
377: }
378: }
379: } catch (IOException except) {
380: throw new SAXException(except);
381: }
382: }
383:
384: public void endElement(String namespaceURI, String localName,
385: String rawName) throws SAXException {
386: try {
387: endElementIO(namespaceURI, localName, rawName);
388: } catch (IOException except) {
389: throw new SAXException(except);
390: }
391: }
392:
393: public void endElementIO(String namespaceURI, String localName,
394: String rawName) throws IOException {
395: ElementState state;
396: String htmlName;
397:
398: // Works much like content() with additions for closing
399: // an element. Note the different checks for the closed
400: // element's state and the parent element's state.
401: _printer.unindent();
402: state = getElementState();
403:
404: if (state.namespaceURI == null
405: || state.namespaceURI.length() == 0)
406: htmlName = state.rawName;
407: else {
408: if (state.namespaceURI.equals(XHTMLNamespace)
409: || (fUserXHTMLNamespace != null && fUserXHTMLNamespace
410: .equals(state.namespaceURI)))
411: htmlName = state.localName;
412: else
413: htmlName = null;
414: }
415:
416: if (_xhtml) {
417: if (state.empty) {
418: // Close all empty tags that require proper closer
419: if (!shouldNotExpandEndTagForEmptyElement(state.rawName
420: .toLowerCase())) {
421: _printer.printText("></");
422: _printer.printText(state.rawName
423: .toLowerCase(Locale.ENGLISH));
424: _printer.printText('>');
425: } else {
426: _printer.printText(" />");
427: }
428: } else {
429: // Must leave CData section first
430: if (state.inCData) {
431: if (html4compat && state.inScript) {
432: _printer.printText("\n//]]>");
433: } else {
434: // _printer.printText( "\n/*]]>*/-->" );
435: _printer.printText("\n/*]]>*/");
436: // _printer.printText( "]]>" );
437: }
438: }
439: // XHTML: element names are lower case, DOM will be different
440: _printer.printText("</");
441: _printer.printText(state.rawName
442: .toLowerCase(Locale.ENGLISH));
443: _printer.printText('>');
444: }
445: } else {
446: if (state.empty)
447: _printer.printText('>');
448: // This element is not empty and that last content was
449: // another element, so print a line break before that
450: // last element and this element's closing tag.
451: // [keith] Provided this is not an anchor.
452: // HTML: some elements do not print closing tag (e.g. LI)
453: if (htmlName == null || !HTMLdtd.isOnlyOpening(htmlName)) {
454: if (_indenting && !state.preserveSpace
455: && state.afterElement)
456: _printer.breakLine();
457: // Must leave CData section first (Illegal in HTML, but still)
458: if (state.inCData)
459: _printer.printText("]]>");
460: _printer.printText("</");
461: _printer.printText(state.rawName);
462: _printer.printText('>');
463: }
464: }
465: // Leave the element state and update that of the parent
466: // (if we're not root) to not empty and after element.
467: state = leaveElementState();
468: // Temporary hack to prevent line breaks inside A/TD
469: if (htmlName == null
470: || (!htmlName.equalsIgnoreCase("A") && !htmlName
471: .equalsIgnoreCase("TD")))
472:
473: state.afterElement = true;
474: state.empty = false;
475: if (isDocumentState())
476: _printer.flush();
477: }
478:
479: //------------------------------------------//
480: // SAX document handler serializing methods //
481: //------------------------------------------//
482:
483: public void characters(char[] chars, int start, int length)
484: throws SAXException {
485: ElementState state;
486:
487: try {
488: // HTML: no CDATA section
489: state = content();
490: if (!_xhtml) {
491: state.doCData = false;
492: }
493: super .characters(chars, start, length);
494: } catch (IOException except) {
495: throw new SAXException(except);
496: }
497: }
498:
499: public void startElement(String tagName, AttributeList attrs)
500: throws SAXException {
501: int i;
502: boolean preserveSpace;
503: ElementState state;
504: String name;
505: String value;
506:
507: try {
508: if (_printer == null)
509: throw new IllegalStateException(DOMMessageFormatter
510: .formatMessage(
511: DOMMessageFormatter.SERIALIZER_DOMAIN,
512: "NoWriterSupplied", null));
513:
514: state = getElementState();
515: if (isDocumentState()) {
516: // If this is the root element handle it differently.
517: // If the first root element in the document, serialize
518: // the document's DOCTYPE. Space preserving defaults
519: // to that of the output format.
520: if (!_started)
521: startDocument(tagName);
522: } else {
523: // For any other element, if first in parent, then
524: // close parent's opening tag and use the parnet's
525: // space preserving.
526: if (state.empty)
527: _printer.printText('>');
528: // Indent this element on a new line if the first
529: // content of the parent element or immediately
530: // following an element.
531: if (_indenting && !state.preserveSpace
532: && (state.empty || state.afterElement))
533: _printer.breakLine();
534: }
535: preserveSpace = state.preserveSpace;
536:
537: // Do not change the current element state yet.
538: // This only happens in endElement().
539:
540: // XHTML: element names are lower case, DOM will be different
541: _printer.printText('<');
542: if (_xhtml)
543: _printer.printText(tagName.toLowerCase(Locale.ENGLISH));
544: else
545: _printer.printText(tagName);
546: _printer.indent();
547:
548: // For each attribute serialize it's name and value as one part,
549: // separated with a space so the element can be broken on
550: // multiple lines.
551: if (attrs != null) {
552: for (i = 0; i < attrs.getLength(); ++i) {
553: _printer.printSpace();
554: name = attrs.getName(i).toLowerCase(Locale.ENGLISH);
555: value = attrs.getValue(i);
556: if (_xhtml) {
557: // XHTML: print empty string for null values.
558: if (value == null) {
559: _printer.printText(name);
560: _printer.printText("=\"\"");
561: } else {
562: _printer.printText(name);
563: _printer.printText("=\"");
564: printEscaped(value);
565: _printer.printText('"');
566: }
567: } else {
568: // HTML: Empty values print as attribute name, no value.
569: // HTML: URI attributes will print unescaped
570: if (value == null) {
571: value = "";
572: }
573: if (!_format.getPreserveEmptyAttributes()
574: && value.length() == 0)
575: _printer.printText(name);
576: else if (HTMLdtd.isURI(tagName, name)) {
577: _printer.printText(name);
578: _printer.printText("=\"");
579: _printer.printText(escapeURI(value));
580: _printer.printText('"');
581: } else if (HTMLdtd.isBoolean(tagName, name))
582: _printer.printText(name);
583: else {
584: _printer.printText(name);
585: _printer.printText("=\"");
586: printEscaped(value);
587: _printer.printText('"');
588: }
589: }
590: }
591: }
592: if (HTMLdtd.isPreserveSpace(tagName))
593: preserveSpace = true;
594:
595: // Now it's time to enter a new element state
596: // with the tag name and space preserving.
597: // We still do not change the curent element state.
598: state = enterElementState(null, null, tagName,
599: preserveSpace);
600:
601: // Prevents line breaks inside A/TD
602: if (tagName.equalsIgnoreCase("A")
603: || tagName.equalsIgnoreCase("TD")) {
604: state.empty = false;
605: _printer.printText('>');
606: } else {
607: if (_xhtml
608: && shouldNotExpandEndTagForEmptyElement(tagName
609: .toLowerCase())) {
610: _printer.printText(" />");
611: } else {
612: _printer.printText(">");
613: }
614: }
615:
616: // Handle SCRIPT and STYLE specifically by changing the
617: // state of the current element to CDATA (XHTML) or
618: // unescaped (HTML).
619: if (tagName.equalsIgnoreCase("SCRIPT")
620: || tagName.equalsIgnoreCase("STYLE")) {
621: if (_xhtml) {
622: // XHTML: Print contents as CDATA section
623: state.doCData = true;
624: if (tagName.equalsIgnoreCase("SCRIPT")) {
625: state.inScript = true;
626: } else {
627: state.inScript = false;
628: }
629: } else {
630: // HTML: Print contents unescaped
631: state.unescaped = true;
632: }
633: }
634: } catch (IOException except) {
635: throw new SAXException(except);
636: }
637: }
638:
639: public void endElement(String tagName) throws SAXException {
640: endElement(null, null, tagName);
641: }
642:
643: //------------------------------------------//
644: // Generic node serializing methods methods //
645: //------------------------------------------//
646:
647: /**
648: * Called to serialize the document's DOCTYPE by the root element.
649: * The document type declaration must name the root element,
650: * but the root element is only known when that element is serialized,
651: * and not at the start of the document.
652: * <p>
653: * This method will check if it has not been called before ({@link #_started}),
654: * will serialize the document type declaration, and will serialize all
655: * pre-root comments and PIs that were accumulated in the document
656: * (see {@link #serializePreRoot}). Pre-root will be serialized even if
657: * this is not the first root element of the document.
658: */
659: protected void startDocument(String rootTagName) throws IOException {
660: StringBuffer buffer;
661:
662: // Not supported in HTML/XHTML, but we still have to switch
663: // out of DTD mode.
664: _printer.leaveDTD();
665: if (!_started) {
666: // If the public and system identifiers were not specified
667: // in the output format, use the appropriate ones for HTML
668: // or XHTML.
669: if (docTypePublicId == null && _docTypeSystemId == null) {
670: if (_xhtml) {
671: docTypePublicId = HTMLdtd.XHTMLPublicId;
672: _docTypeSystemId = HTMLdtd.XHTMLSystemId;
673: } else {
674: docTypePublicId = HTMLdtd.HTMLPublicId;
675: _docTypeSystemId = HTMLdtd.HTMLSystemId;
676: }
677: }
678:
679: if (!_format.getOmitDocumentType()) {
680: // XHTML: If public identifier and system identifier
681: // specified, print them, else print just system identifier
682: // HTML: If public identifier specified, print it with
683: // system identifier, if specified.
684: // XHTML requires that all element names are lower case, so the
685: // root on the DOCTYPE must be 'html'. - mrglavas
686: if (docTypePublicId != null
687: && (!_xhtml || _docTypeSystemId != null)) {
688: if (_xhtml) {
689: _printer.printText("<!DOCTYPE html PUBLIC ");
690: } else {
691: _printer.printText("<!DOCTYPE HTML PUBLIC ");
692: }
693: printDoctypeURL(docTypePublicId);
694: if (docTypeSystemId != null) {
695: if (_indenting) {
696: _printer.breakLine();
697: _printer
698: .printText(" ");
699: } else
700: _printer.printText(' ');
701: printDoctypeURL(docTypeSystemId);
702: }
703: _printer.printText('>');
704: _printer.breakLine();
705: } else if (docTypeSystemId != null) {
706: if (_xhtml) {
707: _printer.printText("<!DOCTYPE html SYSTEM ");
708: } else {
709: _printer.printText("<!DOCTYPE HTML SYSTEM ");
710: }
711: printDoctypeURL(docTypeSystemId);
712: _printer.printText('>');
713: _printer.breakLine();
714: }
715: }
716: }
717:
718: _started = true;
719: // Always serialize these, even if not te first root element.
720: serializePreRoot();
721: }
722:
723: /**
724: * Called to serialize a DOM element. Equivalent to calling {@link
725: * #startElement}, {@link #endElement} and serializing everything
726: * inbetween, but better optimized.
727: */
728: protected void serializeElement(Element elem) throws IOException {
729: Attr attr;
730: NamedNodeMap attrMap;
731: int i;
732: Node child;
733: ElementState state;
734: boolean preserveSpace;
735: String name;
736: String value;
737: String tagName;
738:
739: tagName = elem.getTagName();
740: state = getElementState();
741: if (isDocumentState()) {
742: // If this is the root element handle it differently.
743: // If the first root element in the document, serialize
744: // the document's DOCTYPE. Space preserving defaults
745: // to that of the output format.
746: if (!_started)
747: startDocument(tagName);
748: } else {
749: // For any other element, if first in parent, then
750: // close parent's opening tag and use the parnet's
751: // space preserving.
752: if (state.empty)
753: _printer.printText('>');
754: // Indent this element on a new line if the first
755: // content of the parent element or immediately
756: // following an element.
757: if (_indenting && !state.preserveSpace
758: && (state.empty || state.afterElement))
759: _printer.breakLine();
760: }
761: preserveSpace = state.preserveSpace;
762:
763: // Do not change the current element state yet.
764: // This only happens in endElement().
765:
766: // XHTML: element names are lower case, DOM will be different
767: _printer.printText('<');
768: if (_xhtml)
769: _printer.printText(tagName.toLowerCase(Locale.ENGLISH));
770: else
771: _printer.printText(tagName);
772: _printer.indent();
773:
774: // Lookup the element's attribute, but only print specified
775: // attributes. (Unspecified attributes are derived from the DTD.
776: // For each attribute print it's name and value as one part,
777: // separated with a space so the element can be broken on
778: // multiple lines.
779: attrMap = elem.getAttributes();
780: if (attrMap != null) {
781: for (i = 0; i < attrMap.getLength(); ++i) {
782: attr = (Attr) attrMap.item(i);
783: name = attr.getName().toLowerCase(Locale.ENGLISH);
784: value = attr.getValue();
785: if (attr.getSpecified()) {
786: _printer.printSpace();
787: if (_xhtml) {
788: // XHTML: print empty string for null values.
789: if (value == null) {
790: _printer.printText(name);
791: _printer.printText("=\"\"");
792: } else {
793: _printer.printText(name);
794: _printer.printText("=\"");
795: printEscaped(value);
796: _printer.printText('"');
797: }
798: } else {
799: // HTML: Empty values print as attribute name, no value.
800: // HTML: URI attributes will print unescaped
801: if (value == null) {
802: value = "";
803: }
804: if (!_format.getPreserveEmptyAttributes()
805: && value.length() == 0)
806: _printer.printText(name);
807: else if (HTMLdtd.isURI(tagName, name)) {
808: _printer.printText(name);
809: _printer.printText("=\"");
810: _printer.printText(escapeURI(value));
811: _printer.printText('"');
812: } else if (HTMLdtd.isBoolean(tagName, name))
813: _printer.printText(name);
814: else {
815: _printer.printText(name);
816: _printer.printText("=\"");
817: printEscaped(value);
818: _printer.printText('"');
819: }
820: }
821: }
822: }
823: }
824: if (HTMLdtd.isPreserveSpace(tagName))
825: preserveSpace = true;
826:
827: // If element has children, or if element is not an empty tag,
828: // serialize an opening tag.
829: if (elem.hasChildNodes() || !HTMLdtd.isEmptyTag(tagName)) {
830: // Enter an element state, and serialize the children
831: // one by one. Finally, end the element.
832: state = enterElementState(null, null, tagName,
833: preserveSpace);
834:
835: // Prevents line breaks inside A/TD
836: if (tagName.equalsIgnoreCase("A")
837: || tagName.equalsIgnoreCase("TD")) {
838: state.empty = false;
839: _printer.printText('>');
840: }
841:
842: // Handle SCRIPT and STYLE specifically by changing the
843: // state of the current element to CDATA (XHTML) or
844: // unescaped (HTML).
845: if (tagName.equalsIgnoreCase("SCRIPT")
846: || tagName.equalsIgnoreCase("STYLE")) {
847: if (_xhtml) {
848: // XHTML: Print contents as CDATA section
849: state.doCData = true;
850: if (tagName.equalsIgnoreCase("SCRIPT")) {
851: state.inScript = true;
852: } else {
853: state.inScript = false;
854: }
855: } else {
856: // HTML: Print contents unescaped
857: state.unescaped = true;
858: }
859: }
860: child = elem.getFirstChild();
861: while (child != null) {
862: serializeNode(child);
863: child = child.getNextSibling();
864: }
865: endElementIO(null, null, tagName);
866: } else {
867: _printer.unindent();
868: // XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
869: // HTML: Empty tags are defined as such in DTD no in document.
870: if (_xhtml)
871: _printer.printText(" />");
872: else
873: _printer.printText('>');
874: // After element but parent element is no longer empty.
875: state.afterElement = true;
876: state.empty = false;
877: if (isDocumentState())
878: _printer.flush();
879: }
880: }
881:
882: protected void characters(String text) throws IOException {
883: ElementState state;
884:
885: // HTML: no CDATA section
886: state = content();
887: super .characters(text);
888: }
889:
890: protected String getEntityRef(int ch) {
891: return HTMLdtd.fromChar(ch);
892: }
893:
894: protected String escapeURI(String uri) {
895: int index;
896:
897: // XXX Apparently Netscape doesn't like if we escape the URI
898: // using %nn, so we leave it as is, just remove any quotes.
899: index = uri.indexOf("\"");
900: if (index >= 0)
901: return uri.substring(0, index);
902: else
903: return uri;
904: }
905:
906: public void startAnchoring(String anchorId) {
907: this .anchorId = anchorId;
908: }
909:
910: public void stopAnchoring() {
911: this .anchorId = null;
912: }
913:
914: protected String appendAnchorIfNecessary(String elementName,
915: String attributeName, String attributeValue) {
916: if (anchorId != null) {
917: // looking for an <a> or <form> tag element
918: if (elementName.equalsIgnoreCase("a")
919: || elementName.equalsIgnoreCase("form")) {
920: // found an <a> or <form>, let's peek at the attributes it contains
921: // does it contain either an "href" or "action" attribute
922: if (attributeName.equalsIgnoreCase("href")
923: || attributeName.equalsIgnoreCase("action")) {
924: // found the attribute, now lets make sure it points back to a channel
925: // check for an existing anchor ..
926: if (attributeValue.indexOf(".render.") != -1
927: && attributeValue.indexOf("#") != -1) {
928: return attributeValue;
929: // check for the javascript keyword in the url
930: } else if (attributeValue.indexOf(".render.") != -1
931: && attributeValue.indexOf("javascript:") != -1) {
932: return attributeValue;
933: } else if (attributeValue.indexOf(".render.") != -1) {
934: // this link points back to a channel, so let's
935: // rewrite it and place back into the Attribute Object
936: attributeValue += "#" + anchorId;
937: }
938: }
939: }
940: }
941: return attributeValue;
942: }
943:
944: public boolean shouldNotExpandEndTagForEmptyElement(
945: String elementName) {
946: boolean aReturn = false;
947: for (int i = 0; !aReturn && i < emptyElementsToNotExpand.length; i++)
948: aReturn = emptyElementsToNotExpand[i].equals(elementName);
949: return aReturn;
950: }
951:
952: /** Array of element tag names that are not expanded when empty **/
953: private static final String[] emptyElementsToNotExpand = { "br",
954: "hr", "area", "base", "basefont", "col", "frame", "img",
955: "input", "isindex", "link", "meta", "param" };
956:
957: }
|