001: /*
002: * Enhydra Java Application Server Project
003: *
004: * The contents of this file are subject to the Enhydra Public License
005: * Version 1.1 (the "License"); you may not use this file except in
006: * compliance with the License. You may obtain a copy of the License on
007: * the Enhydra web site ( http://www.enhydra.org/ ).
008: *
009: * Software distributed under the License is distributed on an "AS IS"
010: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
011: * the License for the specific terms governing rights and limitations
012: * under the License.
013: *
014: * The Initial Developer of the Enhydra Application Server is Lutris
015: * Technologies, Inc. The Enhydra Application Server and portions created
016: * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
017: * All Rights Reserved.
018: *
019: * Contributor(s):
020: *
021: * $Id: ExtBaseDOMFormatter.java,v 1.1 2007-10-19 10:35:48 sinisa Exp $
022: */
023:
024: package org.enhydra.xml.io;
025:
026: import java.io.IOException;
027: import java.io.StringWriter;
028: import java.io.Writer;
029:
030: import org.enhydra.xml.dom.DOMAccess;
031: import org.enhydra.xml.dom.DOMError;
032: import org.enhydra.xml.dom.DOMOps;
033: import org.enhydra.xml.dom.DOMTraversal;
034: import org.enhydra.xml.xmlc.XMLObject;
035: import org.enhydra.xml.xmlc.XMLObjectLink;
036: import org.w3c.dom.Attr;
037: import org.w3c.dom.Comment;
038: import org.w3c.dom.Document;
039: import org.w3c.dom.DocumentType;
040: import org.w3c.dom.Element;
041: import org.w3c.dom.Entity;
042: import org.w3c.dom.EntityReference;
043: import org.w3c.dom.Node;
044: import org.w3c.dom.Notation;
045: import org.w3c.dom.Text;
046:
047: /**
048: * Base class with common functionally XML and HTML DOM formatting
049: * functionality.
050: */
051: abstract class ExtBaseDOMFormatter implements Formatter,
052: DOMTraversal.Handler {
053: // N.B. A lot of fields are made final to aid in optimization.
054:
055: /**
056: * Maximum character value in an entity quick-check table.
057: */
058: protected static final int MAX_ENTITY_QUICK_CHECK_CHAR = 0x7f;
059:
060: /**
061: * Quote character to use for attribute values.
062: */
063: protected static final char ATTR_QUOTE_CHAR = '"';
064:
065: /**
066: * Entity reference for attribute value quote character.
067: */
068: protected static final String ATTR_QUOTE_CHAR_ENTITY_REF = "#34";
069:
070: /**
071: * The output options.
072: */
073: protected final OutputOptions fOptions;
074:
075: /**
076: * Output writer.
077: */
078: protected Writer fOut;
079:
080: /**
081: * DOM traverser.
082: */
083: protected final DOMTraversal fTraverser;
084:
085: /**
086: * Should character entity references be omitted from attribute values?
087: */
088: private final boolean fOmitAttributeCharEntityRefs;
089:
090: /**
091: * Are we doing pre-formatting?
092: */
093: protected final boolean fPreFormatMode;
094:
095: /**
096: * Should pre-formatted text be written. This is only enabled if the
097: * document implements PreFormattedTextDocument and the preformatted
098: * encoding matches the output encoding. This option only controls
099: * text nodes that are not children of attributes.
100: */
101: protected final boolean fUsePreFormattedText;
102:
103: /**
104: * Should pre-formatted text be use for descendents of attribute
105: * nodes. A seperate flag is required to support the
106: * omitAttributeCharEntityRefs option.
107: */
108: protected final boolean fUsePreFormattedAttrText;
109:
110: /**
111: * Should pre-formatted Elements be written. This is the same as
112: * fUsePreFormattedText, unless a URLRewriter is present, in which
113: * case it's false since we need to check the attributes for URLs.
114: */
115: protected final boolean fUsePreFormattedElements;
116:
117: /**
118: * Count of preformatted text nodes that were written.
119: */
120: private int fPreFormattedTextCount;
121:
122: /**
123: * Count of text nodes that were written not using preformatted text.
124: */
125: private int fDynamicFormattedTextCount;
126:
127: /**
128: * Count of preformatted element open tags that were written.
129: */
130: protected int fPreFormattedElementCount;
131:
132: /**
133: * Count of element open tags that were written not using preformatted
134: * text.
135: */
136: protected int fDynamicFormattedElementCount;
137:
138: /**
139: * Are we currently outputting an attribute or its children.
140: */
141: private boolean fProcessingAttr;
142:
143: /**
144: * Are we pretty-printing?
145: */
146: protected final boolean fPrettyPrinting;
147:
148: /**
149: * Pretty-printing indent size.
150: */
151: private final int fIndentSize;
152:
153: /**
154: * The encoding for the current document.
155: */
156: private final String fEncoding;
157:
158: /**
159: * The maximum value of an unicode character in the document's encoding.
160: */
161: private final int fMaxCharacterValue;
162:
163: /**
164: * The character set object for the encoding.
165: */
166: private final CharacterSet fCharSet;
167:
168: /**
169: * Object to do URL value rewriting, or null if URL rewriting is
170: * not being done.
171: */
172: private final URLRewriter fURLRewriter;
173:
174: /**
175: * Document being formatted.
176: */
177: protected final Document fDocument;
178:
179: /**
180: * DocumentType from document.
181: */
182: protected final DocumentType fDocType;
183:
184: /**
185: * Public id to use for DOCTYPE, or null if none.
186: */
187: protected final String fPublicId;
188:
189: /**
190: * System id to use for DOCTYPE, or null if none.
191: */
192: protected final String fSystemId;
193:
194: /**
195: * If the document implements DocumentInfo, this is set. It is
196: * used by URL rewriting.
197: */
198: private final DocumentInfo fDocInfo;
199:
200: /**
201: * Entity quick-check table, indexed by character value, indicating if the
202: * character must be represented as an character entity reference.
203: */
204: private final boolean[] fEntityQuickCheck;
205:
206: /**
207: * Platform line separator.
208: */
209: private static final String fNewLine;
210:
211: /**
212: * Static string that is used as a quick way to get a certain indent
213: * level. Indent strings are generated by taking substrings of this
214: * string. This string is grown as needed.
215: */
216: private static String fIndentSource = " ";
217:
218: /**
219: * Static initializer.
220: */
221: static {
222: fNewLine = System.getProperty("line.separator");
223: if (fNewLine == null) {
224: throw new XMLIOError(
225: "System property line.separator not found");
226: }
227: }
228:
229: /**
230: * Check if preformatted text might be used for this document.
231: *
232: * @return The output options that were used for preformatting or null
233: * if preformatted text can be used for some reason.
234: */
235: private static OutputOptions checkUsePreformatting(Document doc,
236: String defaultEncoding, CharacterSet charSet) {
237: // Document must implement PreFormattedTextDocument
238: if (!(doc instanceof PreFormattedTextDocument)) {
239: return null;
240: }
241:
242: // OutputOptions used for preformatting must be available
243: OutputOptions pfOptions = ((PreFormattedTextDocument) doc)
244: .getPreFormatOutputOptions();
245: if (pfOptions == null) {
246: return null;
247: }
248:
249: // Make sure that the two encodings are compatible in terms of which
250: // characters are converted to character entity references.
251: String preFormatEncoding = pfOptions.getMIMEEncoding();
252: if (preFormatEncoding == null) {
253: preFormatEncoding = defaultEncoding;
254: }
255: Encodings encodings = Encodings.getEncodings();
256: CharacterSet preFormatCharSet = encodings
257: .getCharacterSet(preFormatEncoding);
258: if (!charSet.isCompatible(preFormatCharSet)) {
259: return null; // not compatible
260: }
261: return pfOptions;
262: }
263:
264: /**
265: * If preformatting is enabled, determine if text nodes should
266: * use preformatted text.
267: */
268: private static boolean checkUsePreformattedText(
269: OutputOptions options, OutputOptions pfOptions) {
270: // Only pretty-printing must be the same
271: return (pfOptions.getIndentSize() == options.getIndentSize())
272: && (pfOptions.getPrettyPrinting() == options
273: .getPrettyPrinting());
274: }
275:
276: /**
277: * If preformatting is enabled, determine if attribute value text nodes
278: * should use preformatted text.
279: */
280: private static boolean checkUsePreformattedAttrText(
281: OutputOptions options, OutputOptions pfOptions) {
282: return (pfOptions.getOmitAttributeCharEntityRefs() == options
283: .getOmitAttributeCharEntityRefs());
284: }
285:
286: /**
287: * If preformatting is enabled, determine if preformatted element open
288: * tags should be used.
289: */
290: private static boolean checkUsePreformattedElements(
291: OutputOptions options, OutputOptions pfOptions,
292: URLRewriter urlRewriter) {
293: return (pfOptions.getDropHtmlSpanIds() == options
294: .getDropHtmlSpanIds())
295: && (pfOptions.getOmitAttributeCharEntityRefs() == options
296: .getOmitAttributeCharEntityRefs())
297: && (urlRewriter == null);
298: }
299:
300: /**
301: * Get the DocumentInfo object, if available.
302: * This is our extension
303: */
304: public static DocumentInfo findDocumentInfo(Document document) {
305: return new DocumentInfo() {
306: public boolean isURLAttribute(Element element,
307: String attrName) {
308: return org.enhydra.xml.xmlc.dom.HTMLDomFactoryMethods
309: .isURLAttribute(element, attrName);
310: }
311: };
312: }
313:
314: /**
315: * Get the encoding to use. If one is not explictly specified in
316: * output options, see if one can be obtained from the document.
317: * If that fails, use the default for this type of document.
318: */
319: private static String getEncoding(Document document,
320: OutputOptions outputOptions, String defaultEncoding) {
321: String encoding = outputOptions.getMIMEEncoding();
322: if (encoding == null) {
323: // Try getting from the document
324: if (document instanceof XMLObject) {
325: encoding = ((XMLObject) document).getEncoding();
326: } else if (document instanceof XMLObjectLink) {
327: encoding = ((XMLObjectLink) document).getXMLObject()
328: .getEncoding();
329: }
330: if (encoding == null) {
331: // ok, must use default for XML or HTML
332: encoding = defaultEncoding;
333: }
334: }
335: return encoding;
336: }
337:
338: /**
339: * Constructor.
340: *
341: * @param node Any node of the document that this formatter will be
342: * associated with. This can also be an XMLC Document object (XMLObject).
343: * @param options The output options.
344: * @param defaultEncoding The default encoding for this format.
345: * @param forPreFormatting Is this going to be used for preformatting?
346: * @param entityQuickCheck Document-type specific table that provides
347: * a quick check of the need to encode that character as a character
348: * entity reference. This table MUST include the double-quote character,
349: * as it it used to quote attribute values.
350: */
351: protected ExtBaseDOMFormatter(Node node,
352: OutputOptions outputOptions, boolean forPreFormatting,
353: String defaultEncoding, boolean[] entityQuickCheck) {
354: fDocument = DOMOps.getDocument(node);
355: fDocType = DOMAccess.accessDocumentType(fDocument);
356: fOptions = outputOptions;
357: fTraverser = DOMTraversal.getTraverser(this , 0, node);
358:
359: // Get DOCTYPE information (and overrides from OutputOptions)
360: String publicId = fOptions.getPublicId();
361: String systemId = fOptions.getSystemId();
362: if (fDocType != null) {
363: if (fDocType.getPublicId() != null) {
364: publicId = fDocType.getPublicId();
365: }
366: if (fDocType.getSystemId() != null) {
367: systemId = fDocType.getSystemId();
368: }
369: }
370: fPublicId = publicId;
371: fSystemId = systemId;
372:
373: // Initialize the encoding information.
374: fEncoding = getEncoding(fDocument, outputOptions,
375: defaultEncoding);
376: Encodings encodings = Encodings.getEncodings();
377: fMaxCharacterValue = encodings.getMaxCharacterValue(fEncoding);
378: fCharSet = encodings.getCharacterSet(fEncoding);
379:
380: // Setup URL rewriting, which needs DocumentInfo
381: fDocInfo = findDocumentInfo(fDocument);
382: fURLRewriter = (fDocInfo != null) ? fOptions.getURLRewriter()
383: : null;
384:
385: // Various options from output options.
386: fIndentSize = fOptions.getIndentSize();
387: fPrettyPrinting = (fIndentSize > 0)
388: && fOptions.getPrettyPrinting();
389: fOmitAttributeCharEntityRefs = fOptions
390: .getOmitAttributeCharEntityRefs();
391:
392: // Initialize preformatted text options. This is all rather tricky.
393: OutputOptions pfOptions = checkUsePreformatting(fDocument,
394: defaultEncoding, fCharSet);
395:
396: if ((pfOptions != null) && (!forPreFormatting)) {
397: fUsePreFormattedText = checkUsePreformattedText(fOptions,
398: pfOptions);
399: fUsePreFormattedAttrText = checkUsePreformattedAttrText(
400: fOptions, pfOptions);
401: fUsePreFormattedElements = checkUsePreformattedElements(
402: fOptions, pfOptions, fURLRewriter);
403: } else {
404: fUsePreFormattedText = false;
405: fUsePreFormattedAttrText = false;
406: fUsePreFormattedElements = false;
407: }
408: fPreFormatMode = forPreFormatting;
409:
410: // Entity handling
411: fEntityQuickCheck = entityQuickCheck;
412: if (!fOptions.getUseAposEntity())
413: fEntityQuickCheck['\''] = false;
414:
415: // initialize for pre-formatting.
416: if (forPreFormatting) {
417: fOut = new StringWriter(4096); // bigger than default
418: }
419: }
420:
421: /**
422: * @see Formatter#getMIMEEncoding
423: */
424: public final String getMIMEEncoding() {
425: return fEncoding;
426: }
427:
428: /**
429: * @see Formatter#usedPreFormattedText
430: */
431: public boolean usedPreFormattedText() {
432: return fUsePreFormattedText;
433: }
434:
435: /**
436: * Get the count of preformatted text nodes that were written.
437: */
438: public final int getPreFormattedTextCount() {
439: return fPreFormattedTextCount;
440: }
441:
442: /**
443: * Get the count of text nodes that were written not using
444: * preformatted text.
445: */
446: public final int getDynamicFormattedTextCount() {
447: return fDynamicFormattedTextCount;
448: }
449:
450: /**
451: * @see Formatter#usedPreFormattedElements
452: */
453: public final boolean usedPreFormattedElements() {
454: return fUsePreFormattedElements;
455: }
456:
457: /**
458: * Get the count of preformatted element open tags that were written.
459: */
460: public final int getPreFormattedElementCount() {
461: return fPreFormattedElementCount;
462: }
463:
464: /**
465: * Get the count of element open tags that were written not using
466: * preformatted text.
467: */
468: public final int getDynamicFormattedElementCount() {
469: return fDynamicFormattedElementCount;
470: }
471:
472: /**
473: * Write a newline
474: */
475: protected final void writeln() throws IOException {
476: fOut.write(fNewLine);
477: }
478:
479: /**
480: * Grow indentation source string to at least the specified size
481: * if needed.
482: */
483: private static void ensureIndentSource(int numChars) {
484: // Grow as needed; coded to avoid sync
485: while (fIndentSource.length() < numChars) {
486: fIndentSource += fIndentSource;
487: }
488: }
489:
490: /**
491: * Print indentation to the current level.
492: */
493: protected final void printIndent() throws IOException {
494: if (fPrettyPrinting) {
495: int indent = (fTraverser.getDepth() - 1) * fIndentSize;
496: ensureIndentSource(indent);
497: fOut.write(fIndentSource, 0, indent);
498: } // end of if ()
499: }
500:
501: /**
502: * Get a character entity name for a character.
503: * This is the slow-path, so its ok this is an abstract method call.
504: * @return The character entity name, or null if this character doesn't
505: * have one.
506: */
507: abstract protected String getCharacterEntity(char textChar);
508:
509: /**
510: * Output a character, possibly substituting a character entity reference
511: * or a numeric entity. This is the slow-path method.
512: */
513: private void writeCharacter(char textChar) throws IOException {
514: // Determine the entity to substitute; always do quote, even if substitute disabled,
515: // as this is only implemented for attribute values.
516: String entity;
517: if (fProcessingAttr && fOmitAttributeCharEntityRefs) {
518: entity = (textChar == ATTR_QUOTE_CHAR) ? ATTR_QUOTE_CHAR_ENTITY_REF
519: : null;
520: } else {
521: entity = getCharacterEntity(textChar);
522: }
523:
524: // FIXME: need to review impact of isValid call on performance.
525: // this use to be a simple compare.
526: if (entity != null) {
527: fOut.write('&');
528: fOut.write(entity);
529: fOut.write(';');
530: } else if (!fCharSet.isValid(textChar)) {
531: fOut.write("&#");
532: fOut.write(Integer.toString(textChar));
533: fOut.write(';');
534: } else {
535: fOut.write(textChar);
536: }
537: }
538:
539: /**
540: * Write a text string, encoding document type-specific character entities.
541: * This is an expensive procedure and has been carefully hand optimized.
542: */
543: protected final void writeText(String text) throws IOException {
544: if (text == null)
545: return;
546: int len = text.length();
547: char ch;
548:
549: // FIXME: here we make a nasty assumption about all characters
550: // less than fMaxCharacterValue && MAX_ENTITY_QUICK_CHECK_CHAR
551: // are valid. This is probably ok. The fMaxCharacterValue check
552: // is needed for 7-bit encodings.
553: for (int idx = 0; idx < len; idx++) {
554: ch = text.charAt(idx);
555: if ((ch <= fMaxCharacterValue)
556: && (ch <= MAX_ENTITY_QUICK_CHECK_CHAR)
557: && (!fEntityQuickCheck[ch])) {
558: fOut.write(ch); // Fast path.
559: } else {
560: writeCharacter(ch);
561: }
562: }
563: }
564:
565: /**
566: * Handler called for Entity nodes; should never be called.
567: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleEntity
568: */
569: public final void handleEntity(Entity entity) {
570: throw new XMLIOError("Unexpected call to handleEntity");
571: }
572:
573: /**
574: * Handler called for Notation nodes; should never be called.
575: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleNotation
576: */
577: public final void handleNotation(Notation notation) {
578: throw new XMLIOError("Unexpected call to handleNotation");
579: }
580:
581: /**
582: * Handler called for EntityReference nodes.
583: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleEntityReference
584: */
585: public final void handleEntityReference(EntityReference entityRef)
586: throws IOException {
587: fOut.write('&');
588: fOut.write(entityRef.getNodeName());
589: fOut.write(';');
590: }
591:
592: /**
593: * Handler called for Comment nodes.
594: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleComment
595: */
596: public final void handleComment(Comment comment) throws IOException {
597: fOut.write("<!--");
598: fOut.write(comment.getData());
599: fOut.write("-->");
600: }
601:
602: /**
603: * Handler called for Text nodes.
604: * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleText
605: */
606: public void handleText(Text text) throws IOException {
607: String preformattedData = null;
608: if (fProcessingAttr) {
609: if (fUsePreFormattedAttrText
610: && (text instanceof PreFormattedText)) {
611: preformattedData = ((PreFormattedText) text)
612: .getPreFormattedText();
613: }
614: } else {
615: if (fUsePreFormattedText
616: && (text instanceof PreFormattedText)) {
617: preformattedData = ((PreFormattedText) text)
618: .getPreFormattedText();
619: }
620: }
621:
622: if (preformattedData != null) {
623: // Have preformatted data that can be used.
624: fOut.write(preformattedData);
625: fPreFormattedTextCount++;
626: } else {
627: // No preformatted text.
628: writeText(text.getData());
629: fDynamicFormattedTextCount++;
630: }
631: }
632:
633: /**
634: * Write an attribute value. Convert characters to character entity
635: * references as needed.
636: */
637: protected final void writeAttributeValue(Attr attr)
638: throws IOException {
639: fProcessingAttr = true;
640:
641: fOut.write('=');
642: fOut.write(ATTR_QUOTE_CHAR);
643:
644: // Are we doing URL rewritting and is this a URL attr?
645: if ((fURLRewriter != null)
646: && isURLRewriteAttribute(attr.getOwnerElement(), attr
647: .getName())) {
648: String value = fURLRewriter.rewriteURL(attr.getValue());
649: writeText(value);
650: } else {
651: fTraverser.processChildren(attr);
652: }
653: fOut.write(ATTR_QUOTE_CHAR);
654:
655: // Don't need to reset on error, as its reset at the beginning
656: // of each write.
657: fProcessingAttr = false;
658: }
659:
660: /**
661: * Method to write an open tag, including attributes. Children
662: * are not processed. This is normally called by the derived class
663: * handleElement method, but its hear to allow for use by preformatter.
664: */
665: abstract protected void writeOpenTag(Element element,
666: String tagName, boolean hasChildren) throws IOException;
667:
668: /**
669: * Preformat an element.
670: */
671: private String preFormatElement(Element element) throws IOException {
672: writeOpenTag(element, element.getTagName(), element
673: .hasChildNodes());
674: return ((StringWriter) fOut).getBuffer().toString();
675: }
676:
677: /**
678: * Recursively determine if a node is a child of an attribute.
679: */
680: private boolean isAttributeChild(Node node) {
681: if (node == null) {
682: return false;
683: }
684: switch (node.getNodeType()) {
685: case Node.ELEMENT_NODE:
686: return false;
687: case Node.ATTRIBUTE_NODE:
688: return true;
689: default:
690: return isAttributeChild(node.getParentNode());
691: }
692: }
693:
694: /**
695: * determine whether an attribute should have URL rewriting applied to it
696: *
697: * @param element
698: * @param attrName
699: * @return true if it is a rewrite attribute, false if not
700: */
701: private boolean isURLRewriteAttribute(Element element,
702: String attrName) {
703: if (fOptions.getURLRewriteAttributes() != null) {
704: return fOptions.getURLRewriteAttributes()
705: .contains(attrName);
706: }
707: //fall back to defaults provided by each respective XMLCDomFactory implementation
708: return fDocInfo.isURLAttribute(element, attrName);
709: }
710:
711: /**
712: * Preformat a text node. The preformatted text is returned even if
713: * its the same.
714: */
715: private String preFormatText(Text text) throws IOException {
716: fProcessingAttr = isAttributeChild(text);
717: try {
718: handleText(text);
719: } finally {
720: fProcessingAttr = false;
721: }
722: return ((StringWriter) fOut).getBuffer().toString();
723: }
724:
725: /**
726: * @see Formatter#preFormatNode
727: */
728: public final String preFormatNode(Node node) {
729: fProcessingAttr = false;
730: try {
731: ((StringWriter) fOut).getBuffer().setLength(0); // Clear buffer
732:
733: switch (node.getNodeType()) {
734: case Node.ELEMENT_NODE:
735: return preFormatElement((Element) node);
736: case Node.TEXT_NODE:
737: return preFormatText((Text) node);
738: }
739: return null;
740: } catch (IOException except) {
741: // Should never happen.
742: throw new XMLIOError(except);
743: }
744: }
745:
746: /**
747: * Format a Node and children to the specified writer.
748: * @see Formatter#write
749: */
750: public final void write(Node node, Writer writer)
751: throws IOException {
752: try {
753: fOut = writer;
754: fProcessingAttr = false;
755: fTraverser.traverse(node);
756: //csc_040604_1 - this really causes problems if you are using the DOMWriter repeatedly, to write chunks of the document at a
757: // time, rather than the whole thing at once. By putting a writeln() here, it ends up splitting lines wherever
758: // the chunk finishes, and if that's in between spans surrounding text nodes, it can adversely effect the output
759: // that gets displayed. Consequently, we really need to remove this writeln. What this means is that there will
760: // no longer be a final CR/LF at the end of reports, but I don't think that will actually cause any problems
761: //csc_040604_1 writeln();
762: } catch (DOMError error) {
763: // Rethrow IOExceptions
764: Throwable cause = error.getCause();
765: if (cause instanceof IOException) {
766: throw (IOException) cause;
767: } else {
768: throw error;
769: }
770: }
771: }
772: }
|