001: /* ====================================================================
002: * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
003: *
004: * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by Jcorporate Ltd.
021: * (http://www.jcorporate.com/)."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. "Jcorporate" and product names such as "Expresso" must
026: * not be used to endorse or promote products derived from this
027: * software without prior written permission. For written permission,
028: * please contact info@jcorporate.com.
029: *
030: * 5. Products derived from this software may not be called "Expresso",
031: * or other Jcorporate product names; nor may "Expresso" or other
032: * Jcorporate product names appear in their name, without prior
033: * written permission of Jcorporate Ltd.
034: *
035: * 6. No product derived from this software may compete in the same
036: * market space, i.e. framework, without prior written permission
037: * of Jcorporate Ltd. For written permission, please contact
038: * partners@jcorporate.com.
039: *
040: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
041: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
042: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
043: * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
044: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
045: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
046: * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
047: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
048: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
049: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
050: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
051: * SUCH DAMAGE.
052: * ====================================================================
053: *
054: * This software consists of voluntary contributions made by many
055: * individuals on behalf of the Jcorporate Ltd. Contributions back
056: * to the project(s) are encouraged when you make modifications.
057: * Please send them to support@jcorporate.com. For more information
058: * on Jcorporate Ltd. and its products, please see
059: * <http://www.jcorporate.com/>.
060: *
061: * Portions of this software are based upon other open source
062: * products and are subject to their respective licenses.
063: */
064:
065: package com.jcorporate.expresso.ext.report;
066:
067: import com.jcorporate.expresso.core.misc.StringUtil;
068: import com.jcorporate.expresso.kernel.util.FastStringBuffer;
069: import org.w3c.dom.Attr;
070: import org.w3c.dom.DocumentType;
071: import org.w3c.dom.Entity;
072: import org.w3c.dom.NamedNodeMap;
073: import org.w3c.dom.Node;
074: import org.w3c.dom.NodeList;
075: import org.xml.sax.SAXException;
076: import org.xml.sax.SAXParseException;
077:
078: import java.io.IOException;
079: import java.io.StringWriter;
080: import java.io.Writer;
081: import java.util.Arrays;
082: import java.util.HashSet;
083: import java.util.Set;
084:
085: /**
086: * A (DOM) parser that understands the docbook schema and prints it nicely.
087: *
088: * @author David Lloyd
089: */
090:
091: public class DocBookPrinter extends XMLPrinter {
092:
093: /**
094: * The maximum character count (including indentions) allowed for single line text.
095: */
096: protected int _maxFormatLen = 70;
097: /**
098: * Set of inline tags.
099: */
100: protected Set _inlineTagSet = null;
101:
102: public DocBookPrinter() {
103: setIndent('\t', 1);
104:
105: setOmitXmlDecl(true);
106:
107: setWriter(SYSTEM_OUT);
108:
109: _inlineTagSet = new HashSet(Arrays.asList(getInlineTags()));
110: }
111:
112: /**
113: * Return the given node as a string. If any error occurs
114: * in processing (likely an io exception from outputDocument()),
115: * null will be returned.
116: */
117: public static String nodeToString(Node node) {
118: StringWriter writer = new StringWriter();
119: try {
120: nodeToString(node, writer);
121: return writer.getBuffer().toString();
122: } catch (IOException e) {
123: return null;
124: }
125: }
126:
127: /**
128: * Print the given node to a writer.
129: */
130: public static void nodeToString(Node node, Writer writer)
131: throws IOException {
132: DocBookPrinter printer = new DocBookPrinter();
133: printer.setWriter(writer);
134: printer.outputDocument(node);
135: }
136:
137: /**
138: * Override to provide tag relative intelligence.
139: */
140: protected void printTree(Node node) throws IOException {
141: int nodeType = -1;
142:
143: if (node != null) {
144: nodeType = node.getNodeType();
145: switch (nodeType) {
146: case Node.DOCUMENT_NODE: {
147: NodeList nodes = node.getChildNodes();
148:
149: if (nodes != null) {
150: for (int i = 0; i < nodes.getLength(); i++) {
151: printTree(nodes.item(i));
152: }
153: }
154:
155: break;
156: }
157:
158: case Node.ELEMENT_NODE: {
159: String name = node.getNodeName();
160:
161: boolean inlineTag = isInlineTag(name);
162: if (inlineTag) {
163: print("<" + name);
164: } else {
165: print("<" + name, getIndent(this ._column));
166: }
167:
168: NamedNodeMap attributes = node.getAttributes();
169: for (int i = 0; i < attributes.getLength(); i++) {
170: Attr current = (Attr) attributes.item(i);
171:
172: /*
173: With some DOM implementations the default value shows up in addition to a
174: specified value so you get duplicate attributes. We will only write
175: specified attributes.
176: */
177: if (current.getSpecified() == true) {
178: this .print(" " + current.getNodeName() + "='"
179: + toXML(current.getNodeValue(), true)
180: + "'");
181: }
182:
183: }//for attrs
184:
185: if (!node.hasChildNodes()) {
186: // Close opening tag, because no children
187: this .println(" />");
188: } else {
189: // Close opening tag normally to account for children
190: this .print(">");
191:
192: NodeList children = node.getChildNodes();
193:
194: // If a child is a text node, we don't want to print carriage returns that get picked
195: // up as string text
196: boolean hasChildElements = false;
197: if (children != null) {
198: int len = children.getLength();
199: for (int i = 0; i < len; i++) {
200: if (children.item(i).getNodeType() != Node.TEXT_NODE
201: && children.item(i).getNodeType() != Node.CDATA_SECTION_NODE
202: && (children.item(i).getNodeType() != Node.ELEMENT_NODE || !isInlineTag(children
203: .item(i)))) {
204: hasChildElements = true;
205: break;
206: }
207: }
208: }
209:
210: // If non-text node as child, we can print enter
211: if (!inlineTag && hasChildElements) {
212: this .println("");
213: }
214:
215: this ._column++;
216:
217: boolean preTag = isPreTag(name);
218: boolean paraTag = isParaTag(name);
219:
220: boolean isSpecialTag = preTag || paraTag;
221:
222: FastStringBuffer fsb = new FastStringBuffer(1024);
223:
224: for (int i = 0; i < children.getLength(); i++) {
225: if (inlineTag) {
226: fsb.append(nodeToString(children.item(i)));
227: } else if (!isSpecialTag
228: || children.item(i).getNodeType() != Node.TEXT_NODE) {
229: Node child = children.item(i);
230:
231: if (child.getNodeType() == Node.COMMENT_NODE
232: || isInlineTag(child)) {
233: fsb.append(nodeToString(child));
234: } else {
235: if (fsb.length() > 0) {
236: if (printFormatted(fsb.toString())) {
237: hasChildElements = true;
238: } else {
239: println("");
240: }
241: fsb.clear();
242: }
243: printTree(child);
244: }
245: } else if (paraTag) {
246: String value = toXML(children.item(i)
247: .getNodeValue(), false);
248: fsb.append(value);
249: } else if (preTag) {
250: if (children.item(i).getNodeValue() != null
251: && children.item(i).getNodeValue()
252: .trim().length() > 0) {
253: if (fsb.length() > 0) {
254: if (printFormatted(fsb.toString())) {
255: hasChildElements = true;
256: }
257: fsb.clear();
258: }
259:
260: print("<![CDATA[");
261: print(children.item(i).getNodeValue()
262: .trim());
263: print("]]>");
264: hasChildElements = true;
265: }
266: }
267: }
268:
269: if (printFormatted(fsb.toString())) {
270: hasChildElements = true;
271: }
272: fsb.setLength(0);
273:
274: this ._column--;
275:
276: // Write closing tag. Once again for text nodes treat differently
277: if (hasChildElements && !inlineTag && !preTag) {
278: this .println("</" + name + ">",
279: getIndent(this ._column));
280: } else if (!inlineTag) {
281: this .println("</" + name + ">");
282: } else {
283: print("</" + name + ">");
284: }
285: }
286:
287: break;
288: }
289:
290: case Node.TEXT_NODE: {
291: String nodeValue = node.getNodeValue().trim();
292: if (!nodeValue.equals("")) {
293: // Normalize string
294: this .print(toXML(nodeValue, false));
295: }
296: break;
297: }
298:
299: case Node.CDATA_SECTION_NODE: {
300: if (node.getNodeValue() != null
301: && node.getNodeValue().trim().length() > 0) {
302: this
303: .print("<![CDATA[" /*, getIndent( this._column ) */);
304: this .print(convertNewline(node.getNodeValue()));
305: this .print("]]>");
306: }
307: break;
308: }
309:
310: case Node.PROCESSING_INSTRUCTION_NODE: {
311: if (node.getNodeName() != null) {
312: if (!_omitXmlDecl
313: && (false == node.getNodeName().startsWith(
314: "xml"))
315: && (false == node.getNodeName().startsWith(
316: "xsl"))) {
317: // This should NOT be correct, but Xerces seems to have a bug - bt 4/2001
318: this .println("<?xml " + node.getNodeName()
319: + "=\"" + node.getNodeValue() + "\"?>");
320: } else {
321: // This should be the normal behaviour
322: if (!_omitXmlDecl
323: || "xml".equals(node.getNodeName())) {
324: this
325: .println("<?" + node.getNodeName()
326: + " " + node.getNodeValue()
327: + " ?>");
328: }
329: }
330: }
331: break;
332: }
333:
334: case Node.ENTITY_REFERENCE_NODE: {
335: this .println("&" + node.getNodeName() + ";");
336: break;
337: }
338:
339: case Node.COMMENT_NODE: {
340: print("<!--" + node.getNodeValue() + "-->");
341: break;
342: }
343:
344: case Node.DOCUMENT_TYPE_NODE: {
345: DocumentType docType = (DocumentType) node;
346:
347: // Note: below is since DOM 2
348: // Print either SYSTEM '...' or PUBLIC '...' '...'
349: this .print("<!DOCTYPE " + docType.getName());
350: if (docType.getPublicId() != null) {
351: this .print(" PUBLIC ");
352: } else if (docType.getSystemId() != null) {
353: this .print(" SYSTEM ");
354: }
355: // There may not even be a public or system, that's OK
356:
357: if (docType.getPublicId() != null) {
358:
359: this .print("\"" + docType.getPublicId() + "\" ");
360: }
361: if (docType.getSystemId() != null) {
362: this .print("\"" + docType.getSystemId() + "\" ");
363: }
364:
365: // Also print any entities that were defined, such as [<!ENTITY lt "<" >]
366: NamedNodeMap nodes = docType.getEntities();
367:
368: for (int i = 0; i < nodes.getLength(); i++) {
369: this .println("");
370: Entity entity = (Entity) nodes.item(i);
371: this .print(" [<!ENTITY " + entity.getNodeName()
372: + " ");
373:
374: // Entity should have a child node that is its value
375: NodeList children = entity.getChildNodes();
376: if (children != null && children.getLength() > 0) {
377: this .print("\""
378: + XMLPrinter.nodeToString(children
379: .item(0)) + "\">]");
380: } else {
381: this .print("\"" + entity.getNodeValue()
382: + "\">]");
383: }
384: }
385:
386: // End the doctype entry
387: this .println("");
388: this .println(">");
389:
390: break;
391: }
392: }
393:
394: }
395:
396: this ._out.flush();
397: }
398:
399: /**
400: * @return True if the tag name is a docbook pre-formatted tag.
401: */
402: protected boolean isPreTag(String tagName) {
403: return "programlisting".equals(tagName)
404: || "programlistingco".equals(tagName)
405: || "screen".equals(tagName)
406: || "screenco".equals(tagName)
407: || "literallayout".equals(tagName);
408: }
409:
410: /**
411: * @return True if the node is a docbook pre-formatted tag.
412: */
413: protected boolean isPreTag(Node node) {
414: return node != null && node.getNodeType() == Node.ELEMENT_NODE
415: && isPreTag(node.getNodeName());
416: }
417:
418: /**
419: * @return True if the tag name is a docbook paragraph tag.
420: */
421: protected boolean isParaTag(String tagName) {
422: return "para".equals(tagName);
423: }
424:
425: /**
426: * @return True if the node is a docbook paragraph tag.
427: */
428: protected boolean isParaTag(Node node) {
429: return node != null && node.getNodeType() == Node.ELEMENT_NODE
430: && isParaTag(node.getNodeName());
431: }
432:
433: /**
434: * @return True if the node is a docbook inline tag.
435: */
436: protected boolean isInlineTag(Node node) {
437: return node != null
438: && node.getNodeType() == Node.ELEMENT_NODE
439: && (isInlineTag(node.getNodeName()) || isPreTag(node
440: .getParentNode()));
441: }
442:
443: /**
444: * @return True if the tag name is a docbook inline tag.
445: */
446: protected boolean isInlineTag(String tagName) {
447: return _inlineTagSet.contains(tagName);
448: }
449:
450: /**
451: * @return The list of inline tags.
452: */
453: protected String[] getInlineTags() {
454: return new String[] { "emphasis", "inlinegraphic", "link",
455: "olink", "ulink", "phrase", "sgmltag", "subscript",
456: "superscript", "symbol", "trademark", "wordasword",
457: "xref", };
458: }
459:
460: /**
461: * @return The virtual area occupied by a indentation level.
462: */
463: protected int getIndentLength(int col) {
464: return _indentLength * col;
465: }
466:
467: /**
468: * Print the text formatted for a tag that does not place significance on
469: * multiple whitespace. This will layout the text nicely for viewing, short
470: * text is placed inline while long text is formatted into a block. The
471: * threshhold is defined by _maxFormatLen.
472: * An example is docbook's <para>.
473: *
474: * @param value The text to print.
475: */
476: protected boolean printFormatted(String value) throws IOException {
477: if (value == null || value.length() == 0) {
478: return false;
479: }
480: value = formatParaText(value.trim());
481: int len = getIndentLength(_column) + value.length();
482: if (len > _maxFormatLen) {
483: println("");
484: println(value);
485: return true;
486: } else {
487: print(value);
488: }
489: return false;
490: }
491:
492: /**
493: * Print the text formatted for a tag that does not place significance on
494: * multiple whitespace. This will layout the text into a block good for viewing.
495: * An example is docbook's <para>.
496: *
497: * @param text The text to print.
498: */
499: protected String formatParaText(String text) {
500: FastStringBuffer fsb = new FastStringBuffer(1024);
501:
502: //convert MSDOS crlf to a single space
503: text = StringUtil.replace(text, "\r\n", " ");
504: //convert lf to a single space now that crlf will not duplicate this
505: text = StringUtil.replace(text, "\n", " ");
506: //convert tab to a space
507: text = StringUtil.replace(text, "\t", " ");
508:
509: //get rid of duplicate spaces
510: int textlen = text.length();
511: do {
512: int oldlen = textlen;
513: text = StringUtil.replace(text, " ", " "); //replace two spaces with one
514: textlen = text.length();
515: if (oldlen == textlen) {
516: break;
517: }
518: } while (true);
519:
520: int charCount = 0;
521:
522: for (int i = 0; i < textlen; i++) {
523: char c = text.charAt(i);
524:
525: if (charCount >= _maxFormatLen) {
526: if (Character.isWhitespace(c)) // break the line here
527: {
528: fsb.append(_newline);
529: } else {
530: fsb.append(c);
531:
532: int j = i + 1;
533: for (; j < textlen; j++) {
534: c = text.charAt(j);
535: if (Character.isWhitespace(c)) { // break the line here
536: fsb.append(_newline);
537: break;
538: } else {
539: fsb.append(c);
540: }
541: }
542: i = j;
543: }
544: charCount = 0;
545: } else {
546: fsb.append(c);
547: charCount++;
548: }
549: }
550: return fsb.toString();
551: }
552:
553: public static int run(String[] args) {
554: org.apache.log4j.BasicConfigurator.configure();
555:
556: XMLPrinter printer = new DocBookPrinter();
557:
558: return run(args, printer);
559: }
560:
561: public static void main(String[] args) {
562: System.exit(run(args));
563: }
564:
565: //
566: // ErrorHandler methods
567: //
568: /**
569: * Issue a warning on parsing errors
570: *
571: * @param ex A Sax Parse Exception event
572: */
573: public void warning(SAXParseException ex) {
574: log.warn(getLocationString(ex) + ": " + ex.getMessage());
575: }
576:
577: /**
578: * Issue an error
579: *
580: * @param ex A Sax Parse Exception event
581: */
582: public void error(SAXParseException ex) {
583: log.error(getLocationString(ex) + ": " + ex.getMessage());
584: }
585:
586: /**
587: * Fatal error. Used Internally for parsing only
588: *
589: * @param ex A Sax Parse Exception event
590: * @throws SAXException after logging the Parsing Exception
591: */
592: public void fatalError(SAXParseException ex) throws SAXException {
593: log.error(getLocationString(ex) + ": " + ex.getMessage());
594: throw ex;
595: }
596:
597: /**
598: * Returns a string of the location. Used Internally For Parsing Only
599: *
600: * @param ex A Sax Parse Exception event
601: * @return java.lang.String
602: */
603: private String getLocationString(SAXParseException ex) {
604: FastStringBuffer str = new FastStringBuffer(128);
605: String systemId = ex.getSystemId();
606:
607: if (systemId != null) {
608: int index = systemId.lastIndexOf('/');
609:
610: if (index != -1) {
611: systemId = systemId.substring(index + 1);
612: }
613:
614: str.append(systemId);
615: }
616:
617: str.append(':');
618: str.append(ex.getLineNumber());
619: str.append(':');
620: str.append(ex.getColumnNumber());
621:
622: return str.toString();
623: } // getLocationString(SAXParseException):String
624: }
|