001: /*
002: * Enhydra Java Application Server Project
003: *
004: * The contents of this file are subject to the Enhydra Public License
005: * Version 1.1 (the "License"); you may not use this file except in
006: * compliance with the License. You may obtain a copy of the License on
007: * the Enhydra web site ( http://www.enhydra.org/ ).
008: *
009: * Software distributed under the License is distributed on an "AS IS"
010: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
011: * the License for the specific terms governing rights and limitations
012: * under the License.
013: *
014: * The Initial Developer of the Enhydra Application Server is Lutris
015: * Technologies, Inc. The Enhydra Application Server and portions created
016: * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
017: * All Rights Reserved.
018: *
019: * Contributor(s):
020: *
021: * $Id: ExtXMLFormatter.java,v 1.1 2007-10-19 10:35:48 sinisa Exp $
022: */
023:
024: package org.enhydra.xml.io;
025:
026: import java.io.BufferedWriter;
027: import java.io.ByteArrayOutputStream;
028: import java.io.IOException;
029: import java.io.OutputStream;
030: import java.io.OutputStreamWriter;
031: import java.io.Writer;
032:
033: import org.enhydra.xml.dom.DOMAccess;
034: import org.enhydra.xml.dom.DOMOps;
035: import org.w3c.dom.Attr;
036: import org.w3c.dom.CDATASection;
037: import org.w3c.dom.Document;
038: import org.w3c.dom.DocumentFragment;
039: import org.w3c.dom.DocumentType;
040: import org.w3c.dom.Element;
041: import org.w3c.dom.Node;
042: import org.w3c.dom.ProcessingInstruction;
043: import org.w3c.dom.Text;
044:
045: /*
046: * FIXME:
047: * - Add option to encode entity references from the DTD.
048: * - namespaces
049: * - writespace
050: * - Need to handle entities for 8bit encodings
051: */
052:
053: /**
054: * Formatter for outputting a HTML DOM as a HTML text document.
055: */
056: public class ExtXMLFormatter extends ExtBaseDOMFormatter implements
057: org.enhydra.util.DOMFormatter {
058: /**
059: * Default XML encoding.
060: */
061: private static final String DEFAULT_XML_ENCODING = "UTF-8";
062:
063: /**
064: * XML version (only 1.0 supported, as thats all there is now).
065: */
066: private static final String XML_VERSION = "1.0";
067:
068: /**
069: * Table use to optimized checking for characters that should be
070: * represented as entity references.
071: */
072: private static final boolean[] fEntityQuickCheck = new boolean[MAX_ENTITY_QUICK_CHECK_CHAR + 1];
073:
074: /**
075: * Indicates then a text has just been handled
076: */
077: private boolean fHandleText = false;
078:
079: /**
080: * Indicates the next Sibling is a Text node
081: */
082: private boolean fNextSiblingText = false;
083:
084: /**
085: * Static constructor.
086: */
087: static {
088: fEntityQuickCheck['<'] = true;
089: fEntityQuickCheck['>'] = true;
090: fEntityQuickCheck['"'] = true;
091: fEntityQuickCheck['\''] = true;
092: fEntityQuickCheck['&'] = true;
093: }
094:
095: /**
096: * Constructor.
097: */
098: public ExtXMLFormatter(Node node, OutputOptions outputOptions,
099: boolean forPreFormatting) {
100: super (node, outputOptions, forPreFormatting,
101: DEFAULT_XML_ENCODING, fEntityQuickCheck);
102: }
103:
104: /**
105: * Get the default OutputOptions for a document formatter with this
106: * formatter. The encoding will not be set, which signals to use the
107: * default encoding.
108: */
109: static OutputOptions getDefaultOutputOptions() {
110: return new OutputOptions(); // Nothing special
111: }
112:
113: /**
114: * @see BaseDOMFormatter#getCharacterEntity
115: */
116: protected final String getCharacterEntity(char textChar) {
117: switch (textChar) {
118: case '<':
119: return "lt";
120: case '>':
121: return "gt";
122: case '"':
123: return "quot";
124: case '\'':
125: return "apos";
126: case '&':
127: return "amp";
128: default:
129: return null;
130: }
131: }
132:
133: /**
134: * Write the XML header.
135: */
136: private void writeXMLHeader(Document document) throws IOException {
137: fOut.write("<?xml version=\"");
138: fOut.write(XML_VERSION);
139: fOut.write("\"");
140:
141: // Encoding
142: if (!fOptions.getOmitEncoding()) {
143: fOut.write(" encoding=\"");
144: fOut.write(getMIMEEncoding());
145: fOut.write('"');
146: }
147:
148: // Standalone - can only determine this if document
149: // supports the method. If not, default to "no"
150: try {
151: if (DOMOps.getStandalone(document)) {
152: fOut.write(" standalone=\"yes\"");
153: }
154: } catch (UnsupportedOperationException e) {
155: // Ignore this - we assume 'standalone="no"' in this case...
156: }
157:
158: fOut.write("?>");
159: writeln();
160: }
161:
162: /**
163: * Handler called for Document nodes; creates the XML file header.
164: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocument
165: */
166: public void handleDocument(Document document) throws IOException {
167: if (!fOptions.getOmitXMLHeader()) {
168: writeXMLHeader(document);
169: }
170: fTraverser.processDocumentType(document);
171: fTraverser.processChildren(document);
172: }
173:
174: /**
175: * Handler called for Document nodes; writes the DOCTYPE specification.
176: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentType
177: */
178: public void handleDocumentType(DocumentType documentType)
179: throws IOException {
180: if (fOptions.getOmitDocType()) {
181: return; // Don't output DOCTYPE.
182: }
183: String internalSubset = documentType.getInternalSubset();
184: if ((fPublicId == null) && (fSystemId == null)
185: && (internalSubset == null)) {
186: return; // No DOCTYPE
187: }
188: Element docElement = fDocument.getDocumentElement();
189: if (docElement == null) {
190: throw new XMLIOError(
191: "Document has DocumentType, with out having a root element");
192: }
193:
194: fOut.write("<!DOCTYPE ");
195: fOut.write(docElement.getTagName());
196:
197: if (fPublicId != null) {
198: // Public requires system.
199: if (fSystemId == null) {
200: throw new XMLIOError(
201: "No SYSTEM id to accompany PUBLIC id: "
202: + fPublicId);
203: }
204: fOut.write(" PUBLIC \"");
205: fOut.write(fPublicId);
206: fOut.write("\" \"");
207: fOut.write(fSystemId);
208: fOut.write("\"");
209: } else if (fSystemId != null) {
210: fOut.write(" SYSTEM \"");
211: fOut.write(fSystemId);
212: fOut.write("\"");
213: }
214:
215: if (internalSubset != null) {
216: writeln();
217: fOut.write(" [");
218: fOut.write(internalSubset);
219: fOut.write("]");
220: }
221: fOut.write('>');
222: writeln();
223: }
224:
225: /**
226: * Handler called for DocumentFragment nodes; just process children
227: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentFragment
228: */
229: public void handleDocumentFragment(DocumentFragment documentFragment) {
230: fTraverser.processChildren(documentFragment);
231: }
232:
233: /**
234: * Handler called for Attr nodes.
235: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleAttr
236: */
237: public void handleAttr(Attr attr) throws IOException {
238: fOut.write(' ');
239: fOut.write(attr.getName());
240: writeAttributeValue(attr);
241: }
242:
243: /**
244: * Write an element open tag.
245: */
246: protected final void writeOpenTag(Element element, String tagName,
247: boolean hasChildren) throws IOException {
248: String formattedTag = null;
249: if (fPrettyPrinting) {
250: if (fNextSiblingText) {
251: fOut.write('\n');
252: }
253: fNextSiblingText = (element.getNextSibling() instanceof Text);
254: } // end of if ()
255:
256: if (fUsePreFormattedElements
257: && (element instanceof PreFormattedText)) {
258: formattedTag = ((PreFormattedText) element)
259: .getPreFormattedText();
260: }
261: if (formattedTag != null) {
262: fOut.write(formattedTag);
263: fPreFormattedElementCount++;
264: } else {
265: if (fPrettyPrinting
266: && !(element.getPreviousSibling() instanceof Text)) {
267: printIndent();
268: } // end of if ()
269:
270: fOut.write('<');
271: fOut.write(tagName);
272: fTraverser.processAttributes(element);
273: if (!hasChildren
274: && !(fOptions.getEnableXHTMLCompatibility() && !isXHTMLContentModelEmpty(tagName))) {
275: //see http://www.w3.org/TR/xhtml1/#C_2
276: if (fOptions.getEnableXHTMLCompatibility())
277: fOut.write(" /");
278: else
279: fOut.write('/');
280: }
281: fOut.write('>');
282: fDynamicFormattedElementCount++;
283: if (fPrettyPrinting
284: && !(element.getFirstChild() instanceof Text)) {
285: fOut.write('\n');
286: } // end of if ()
287: }
288: }
289:
290: /**
291: * Handler called for Element nodes.
292: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleElement
293: */
294: public void handleElement(Element element) throws IOException {
295: String tagName = element.getTagName();
296:
297: // Start Barracuda Kludge ======
298: // check the element to see if it contains a
299: // attribute "visdom".
300: // (org.enhydra.barracuda.core.comp.BComponent.VISIBILITY_MARKER)
301: // This controls DOM visibility. If this value exists and does not
302: // match to true, don't print this particular node.
303: // Note: This should be made generic, but for now...
304: Attr attr = DOMAccess.accessAttribute(fDocument, element, null,
305: "visdom");
306: if (attr != null
307: && !(Boolean.valueOf(attr.getValue()).booleanValue()))
308: return;
309: // End Barracuda Kludge ======
310:
311: boolean hasChildren = element.hasChildNodes();
312:
313: writeOpenTag(element, tagName, hasChildren);
314:
315: // Process children and close.
316: if (hasChildren
317: || (fOptions.getEnableXHTMLCompatibility() && !isXHTMLContentModelEmpty(tagName))) {
318: fTraverser.processChildren(element);
319:
320: if (fHandleText) {
321: fHandleText = false;
322: } else {
323: printIndent();
324: } // end of else
325:
326: fOut.write("</");
327: fOut.write(tagName);
328: fOut.write('>');
329:
330: if (fPrettyPrinting && !fNextSiblingText) {
331: fOut.write('\n');
332: } // end of if ()
333: }
334: }
335:
336: /**
337: * Handler called for ProcessingInstruction nodes.
338: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleProcessingInstruction
339: */
340: public void handleProcessingInstruction(ProcessingInstruction pi)
341: throws IOException {
342: fOut.write("<?");
343: fOut.write(pi.getTarget());
344: String data = pi.getData();
345: if (data != null) {
346: fOut.write(' ');
347: fOut.write(data);
348: }
349: fOut.write("?>");
350: }
351:
352: /**
353: * Handler called for CDATASection nodes.
354: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleCDATASection
355: */
356: public void handleCDATASection(CDATASection cdata)
357: throws IOException {
358: //FIXME: Should we handle embedded ]]>???
359: fOut.write("<![CDATA[");
360: fOut.write(cdata.getNodeValue());
361: fOut.write("]]>");
362: }
363:
364: /**
365: * Handler called for Text nodes.
366: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleText
367: */
368: public final void handleText(Text text) throws IOException {
369: fHandleText = true;
370: super .handleText(text);
371: }
372:
373: /**
374: * See {@link http://www.w3.org/TR/xhtml1/#C_3}
375: *
376: * <p>Utility method to tell us which XHTML elements contain an empty
377: * content model allowing us to, in the converse, encapsulate the
378: * logic needed to deal with XHTML tags that, if backward compatibility
379: * is requested, should be written with an explicit close tag rather than
380: * use the default minimized syntax when the element has no children.</p>
381: *
382: * @version 2.2
383: */
384: private static boolean isXHTMLContentModelEmpty(String tagName) {
385: String[] emptyTags = { "br", "area", "link", "img", "param",
386: "hr", "input", "col", "base", "meta" };
387: boolean isEmptyTag = false;
388: for (int i = 0; i < emptyTags.length; i++) {
389: if (emptyTags[i].equalsIgnoreCase(tagName)) {
390: isEmptyTag = true;
391: break;
392: }
393: }
394: return isEmptyTag;
395: }
396:
397: public byte[] toBytes(Node document) {
398: ByteArrayOutputStream out = new ByteArrayOutputStream();
399: try {
400: write(document, out);
401: } catch (IOException e) {
402: // TODO Auto-generated catch block
403: e.printStackTrace();
404: }
405: return out.toByteArray();
406: }
407:
408: /**
409: * Output a document or any node and its children to a OutputStream.
410: */
411: public void write(Node node, OutputStream out) throws IOException {
412: Formatter formatter = getFormatter(node, fOptions, false);
413: Writer writer = new BufferedWriter(new OutputStreamWriter(out,
414: formatter.getMIMEEncoding()));
415: formatter.write(node, writer);
416: writer.flush();
417: }
418:
419: /*
420: * Factory for Formatter objects base on the document type.
421: * This is currently used only my XMLC, this interface may change in the
422: * future.
423: */
424: public static Formatter getFormatter(Node node,
425: OutputOptions outputOptions, boolean forPreFormatting) {
426: return new ExtXMLFormatter(node, outputOptions,
427: forPreFormatting);
428: }
429:
430: }
|