001: /*
002: * Enhydra Java Application Server Project
003: *
004: * The contents of this file are subject to the Enhydra Public License
005: * Version 1.1 (the "License"); you may not use this file except in
006: * compliance with the License. You may obtain a copy of the License on
007: * the Enhydra web site ( http://www.enhydra.org/ ).
008: *
009: * Software distributed under the License is distributed on an "AS IS"
010: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
011: * the License for the specific terms governing rights and limitations
012: * under the License.
013: *
014: * The Initial Developer of the Enhydra Application Server is Lutris
015: * Technologies, Inc. The Enhydra Application Server and portions created
016: * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
017: * All Rights Reserved.
018: *
019: * Contributor(s):
020: *
021: * $Id: ExtHTMLFormatter.java,v 1.1 2007-10-19 10:35:48 sinisa Exp $
022: */
023:
024: package org.enhydra.xml.io;
025:
026: import java.io.BufferedWriter;
027: import java.io.ByteArrayOutputStream;
028: import java.io.IOException;
029: import java.io.OutputStream;
030: import java.io.OutputStreamWriter;
031: import java.io.Writer;
032:
033: import org.enhydra.xml.dom.DOMAccess;
034: import org.w3c.dom.Attr;
035: import org.w3c.dom.CDATASection;
036: import org.w3c.dom.Document;
037: import org.w3c.dom.DocumentFragment;
038: import org.w3c.dom.DocumentType;
039: import org.w3c.dom.Element;
040: import org.w3c.dom.Node;
041: import org.w3c.dom.ProcessingInstruction;
042: import org.w3c.dom.Text;
043:
044: /*
045: * FIXME:
046: * - Need to check for optional attributes.
047: * - Give control over newline in OutputOptions or maybe use a PrintWriter???
048: * - Need option to include HTML 4.0 entity references.
049: * - script and style child special handling.
050: * - Need to output HTML header.
051: */
052:
053: /**
054: * Formatter for outputting a HTML DOM as a HTML text document.
055: */
056: public class ExtHTMLFormatter extends ExtBaseDOMFormatter implements
057: org.enhydra.util.DOMFormatter {
058: /**
059: * Default XML encoding.
060: */
061: private static final String DEFAULT_XML_ENCODING = "ISO-8859-1";
062:
063: /**
064: * Table use to optimized checking for characters that should be
065: * represented as entity references.
066: */
067: private static final boolean[] fEntityQuickCheck = new boolean[MAX_ENTITY_QUICK_CHECK_CHAR + 1];
068:
069: /**
070: * Should SPAN ID attributes be dropped?
071: */
072: private final boolean fDropSpanIds;
073:
074: /**
075: * Flag that indicates the ID attribute should be dropped for the current
076: * element.
077: */
078: private boolean fDropThisId;
079:
080: /**
081: * Flag that indicates wether to use all named entites for all HTML 4.0 character
082: * entities or not.
083: */
084: private boolean fUseHTML4Entities;
085:
086: /**
087: * Nesting count for elements that don't have their content formatted.
088: * This is done for script and style elements. The contents of these
089: * elements are outputted as-is. Its not legal for them to nest, but a
090: * count is used to keep code from being confused by a broken DOM.
091: */
092: private int fNoFormatNestCount;
093:
094: /**
095: * Indicates then a text has just been handled
096: */
097: private boolean fHandleText = false;
098:
099: /**
100: * Indicates the next Sibling is a Text node
101: */
102: private boolean fNextSiblingText = false;
103:
104: /**
105: * Static constructor.
106: */
107: static {
108: for (char ch = 0; ch <= MAX_ENTITY_QUICK_CHECK_CHAR; ch++) {
109: fEntityQuickCheck[ch] = (HTMLEntities.charToEntity(ch) != null);
110: }
111: }
112:
113: /**
114: * Constructor.
115: */
116: public ExtHTMLFormatter(Node node, OutputOptions outputOptions,
117: boolean forPreFormatting) {
118: super (node, outputOptions, forPreFormatting,
119: DEFAULT_XML_ENCODING, fEntityQuickCheck);
120: fDropSpanIds = fOptions.getDropHtmlSpanIds();
121: fUseHTML4Entities = fOptions.getUseHTML4Entities();
122: }
123:
124: /**
125: * Get the default OutputOptions for a document formatter with this
126: * formatter. The encoding will not be set, which signals to use the
127: * default encoding.
128: */
129: static OutputOptions getDefaultOutputOptions() {
130: return new OutputOptions(); // Nothing special
131: }
132:
133: /**
134: * @see ExtBaseDOMFormatter#getCharacterEntity
135: */
136: protected final String getCharacterEntity(char textChar) {
137: if (fUseHTML4Entities) {
138: return HTMLEntities.charToEntity4(textChar);
139: } else {
140: return HTMLEntities.charToEntity(textChar);
141: }
142: }
143:
144: /**
145: * Determine if an attribute's value should be printer. Those that don't
146: * normally have values only get them if one was explictly supplied.
147: */
148: private boolean printableAttrValue(Attr attr) {
149: return (!(HTMLElements.isBooleanAttr(attr.getName())));
150: }
151:
152: /**
153: * Output the DOCTYPE declaration, if the information is available.
154: */
155: private void outputDocType(Document document) throws IOException {
156: //FIXME: Don't currently have a way of getting doctype from the parser
157: //to here, the only way is via the outputOptions override.
158: if ((fPublicId != null) || (fSystemId != null)) {
159: fOut.write("<!DOCTYPE html");
160: if (fPublicId != null) {
161: fOut.write(" PUBLIC \"");
162: fOut.write(fPublicId);
163: fOut.write("\"");
164: }
165: if (fSystemId != null) {
166: if (fPublicId == null) {
167: fOut.write(" SYSTEM");
168: }
169: fOut.write(" \"");
170: fOut.write(fSystemId);
171: fOut.write("\"");
172: }
173: fOut.write('>');
174: writeln();
175: }
176: }
177:
178: /**
179: * Handler called for Document nodes.
180: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocument
181: */
182: public void handleDocument(Document document) throws IOException {
183: if (!fOptions.getOmitDocType()) {
184: outputDocType(document);
185: }
186: fTraverser.processChildren(document);
187: }
188:
189: /**
190: * Handler called for Document nodes; should never be called.
191: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentType
192: */
193: public void handleDocumentType(DocumentType documentType)
194: throws IOException {
195: throw new XMLIOError("Unexpected call to handleDocumentType");
196: }
197:
198: /**
199: * Handler called for DocumentFragment nodes; just process children.
200: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentFragment
201: */
202: public void handleDocumentFragment(DocumentFragment documentFragment) {
203: fTraverser.processChildren(documentFragment);
204: }
205:
206: /**
207: * Handler called for Attr nodes.
208: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleAttr
209: */
210: public void handleAttr(Attr attr) throws IOException {
211: String name = attr.getName();
212: if (!(fDropThisId && name.equals("id"))) {
213: fOut.write(' ');
214: fOut.write(name);
215: if (printableAttrValue(attr)) {
216: writeAttributeValue(attr);
217: }
218: }
219: }
220:
221: /**
222: * Write an element open tag. The hasChildren option is ignored.
223: */
224: protected final void writeOpenTag(Element element, String tagName,
225: boolean hasChildren) throws IOException {
226: String formattedTag = null;
227: if (fPrettyPrinting) {
228: if (fNextSiblingText) {
229: fOut.write('\n');
230: }
231: fNextSiblingText = (element.getNextSibling() instanceof Text);
232: } // end of if ()
233:
234: if (fUsePreFormattedElements
235: && (element instanceof PreFormattedText)) {
236: formattedTag = ((PreFormattedText) element)
237: .getPreFormattedText();
238: }
239: if (formattedTag != null) {
240: fOut.write(formattedTag);
241: fPreFormattedElementCount++;
242: } else {
243: if (fPrettyPrinting
244: && !(element.getPreviousSibling() instanceof Text)) {
245: printIndent();
246: } // end of if ()
247:
248: fDropThisId = fDropSpanIds && tagName.equals("SPAN");
249: fOut.write('<');
250: fOut.write(tagName);
251: fTraverser.processAttributes(element);
252: fOut.write('>');
253: fDynamicFormattedElementCount++;
254: if (fPrettyPrinting
255: && !(element.getFirstChild() instanceof Text)) {
256: fOut.write('\n');
257: } // end of if ()
258: }
259: }
260:
261: /**
262: * Write an element close tag.
263: */
264: private void writeCloseTag(String tagName) throws IOException {
265: // Output end tag when legal.
266: if (fHandleText) {
267: fHandleText = false;
268: } else {
269: printIndent();
270: } // end of else
271:
272: if (HTMLElements.hasCloseTag(tagName)) {
273: fOut.write("</");
274: fOut.write(tagName);
275: fOut.write('>');
276: }
277: if (fPrettyPrinting && !fNextSiblingText) {
278: fOut.write('\n');
279: } // end of if ()
280: }
281:
282: /**
283: * Handler called for Element nodes.
284: * <p>
285: * This optionally corrects problem cases for browsers:
286: * <UL>
287: * <LI> ID attributes are dropped from SPAN tags. This cause Internet
288: * Explorer 4.0 to get confused on keep-alive connections.
289: * </UL>
290: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleElement
291: */
292: public void handleElement(Element element) throws IOException {
293: String tagName = element.getTagName();
294:
295: // Start Barracuda Kludge ======
296: // check the element to see if it contains a
297: // attribute "visdom".
298: // (org.enhydra.barracuda.core.comp.BComponent.VISIBILITY_MARKER)
299: // This controls DOM visibility. If this value exists and does not
300: // match to true, don't print this particular node.
301: // Note: This should be made generic, but for now...
302: Attr attr = DOMAccess.accessAttribute(fDocument, element, null,
303: "visdom");
304: if (attr != null
305: && !(Boolean.valueOf(attr.getValue()).booleanValue()))
306: return;
307: // End Barracuda Kludge ======
308:
309: // HTML version doesn't care if it has childrne.
310: writeOpenTag(element, tagName, false);
311:
312: // Output childern and close
313: boolean isScriptStyle = HTMLElements.isScriptStyle(element);
314: if (isScriptStyle) {
315: fNoFormatNestCount++;
316: }
317: fTraverser.processChildren(element);
318: if (isScriptStyle) {
319: fNoFormatNestCount--;
320: }
321: writeCloseTag(tagName);
322: }
323:
324: /**
325: * Handler called for ProcessingInstruction nodes.
326: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleProcessingInstruction
327: */
328: public void handleProcessingInstruction(ProcessingInstruction pi)
329: throws IOException {
330: throw new XMLIOError(
331: "Unexpected call to handleProcessingInstruction");
332: }
333:
334: /**
335: * Handler called for CDATASection nodes.
336: * Non-standard extension: outputs data as-is.
337: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleCDATASection
338: */
339: public void handleCDATASection(CDATASection cdata)
340: throws IOException {
341: fOut.write(cdata.getData());
342: }
343:
344: /**
345: * Handler called for Text nodes.
346: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleText
347: */
348: public final void handleText(Text text) throws IOException {
349: fHandleText = true;
350: if (fNoFormatNestCount > 0) {
351: fOut.write(text.getData());
352: } else {
353: super .handleText(text);
354: }
355: }
356:
357: public byte[] toBytes(Node document) {
358: ByteArrayOutputStream out = new ByteArrayOutputStream();
359: try {
360: write(document, out);
361: } catch (IOException e) {
362: // TODO Auto-generated catch block
363: e.printStackTrace();
364: }
365: return out.toByteArray();
366: }
367:
368: /**
369: * Output a document or any node and its children to a OutputStream.
370: */
371: public void write(Node node, OutputStream out) throws IOException {
372: Formatter formatter = getFormatter(node, fOptions, false);
373: Writer writer = new BufferedWriter(new OutputStreamWriter(out,
374: formatter.getMIMEEncoding()));
375: formatter.write(node, writer);
376: writer.flush();
377: }
378:
379: /*
380: * Factory for Formatter objects base on the document type.
381: * This is currently used only my XMLC, this interface may change in the
382: * future.
383: */
384: public static Formatter getFormatter(Node node,
385: OutputOptions outputOptions, boolean forPreFormatting) {
386: return new ExtHTMLFormatter(node, outputOptions,
387: forPreFormatting);
388: }
389:
390: }
|