001: /*
002: * The contents of this file are subject to the
003: * Mozilla Public License Version 1.1 (the "License");
004: * you may not use this file except in compliance with the License.
005: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
006: *
007: * Software distributed under the License is distributed on an "AS IS"
008: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.
009: * See the License for the specific language governing rights and
010: * limitations under the License.
011: *
012: * The Initial Developer of the Original Code is Simulacra Media Ltd.
013: * Portions created by Simulacra Media Ltd are Copyright (C) Simulacra Media Ltd, 2004.
014: *
015: * All Rights Reserved.
016: *
017: * Contributor(s):
018: */
019:
020: package org.openharmonise.commons.xml;
021:
022: import java.io.*;
023: import java.util.*;
024: import java.util.logging.*;
025:
026: import org.openharmonise.commons.xml.namespace.*;
027: import org.w3c.dom.*;
028:
029: /**
030: * This class prints XML, formatted so that it is nicely indented and empty elements
031: * are self-closing. It can print to a String or directly to a file, by passing it
032: * either a <code>java.io.File</code> object or a path to the required output file.
033: *
034: * @author Matthew Large
035: */
036:
037: public class XMLPrettyPrint {
038:
039: /**
040: * Namespace Resolver to use during output.
041: */
042: private NamespaceResolver m_resolver = null;
043:
044: /**
045: * true if this printer should act in a namespace aware mode.
046: */
047: private boolean m_bNamespaceAware = false;
048:
049: /**
050: * Logger for this class.
051: */
052: private static final Logger m_logger = Logger
053: .getLogger(XMLPrettyPrint.class.getName());
054:
055: public XMLPrettyPrint() {
056: }
057:
058: /**
059: * Sets whether XMLPrettyPrint will deal with XML Namespaces. If you are setting
060: * XMLPrettyPrint to be Namespace Aware and you have not set a NamespaceResolver
061: * then a XMLPrettyPrint will create a new local one, this can be overridden by
062: * setting your own using setNamespaceResolver.
063: *
064: * @param bNamespaceAware
065: */
066: public void setNamespaceAware(boolean bNamespaceAware) {
067: this .m_bNamespaceAware = bNamespaceAware;
068: if (bNamespaceAware && m_resolver == null) {
069: m_resolver = new LocalNamespaceResolver();
070: }
071: }
072:
073: /**
074: * Method to check if XMLPrettyPrint is set to be Namespace Aware.
075: *
076: * @return True if XMLPrettyPrint will deal with XML Namespaces
077: */
078: public boolean isNamespaceAware() {
079: return this .m_bNamespaceAware;
080: }
081:
082: /**
083: * Sets the NamespaceResolver that XMLPrettyPrint will use when printing namespaced
084: * XML. If the NamespaceResolver that is passed in is not null, XMLPrettyPrint will
085: * automatically be set to be namespace aware. If the NamespaceResolver that is passed in is
086: * null, then XMLPrettyPrint will be set to be namespace unaware and the currently
087: * held resolver (if any) will be removed.
088: *
089: * @param resolver The NamespaceResolver that XMLPrettyPrint is to use
090: */
091: public void setNamespaceResolver(NamespaceResolver resolver) {
092: if (resolver != null) {
093: this .m_resolver = resolver;
094: this .m_bNamespaceAware = true;
095: } else {
096: this .m_resolver = null;
097: this .m_bNamespaceAware = false;
098: }
099: }
100:
101: /**
102: * Accessor to the current NamespaceResolver.
103: *
104: * @return The NamespaceResolver that XMLPrettyPrint will use when printing namespaced XML.
105: */
106: public NamespaceResolver getNamespaceResolver() {
107: return this .m_resolver;
108: }
109:
110: /**
111: * Prints a given DOM Node to a File as specified by a path.
112: *
113: * @param node DOM Node to print
114: * @param sFilepath Path of File to print DOM Node to
115: */
116: public void printNodeToFile(Node node, String sFilepath) {
117: printNodeToFile(node, new File(sFilepath));
118: }
119:
120: /**
121: * Prints a given DOM Node to a given File.
122: *
123: * @param node DOM Node to print
124: * @param file File to print DOM Node to
125: */
126: public void printNodeToFile(Node node, File file) {
127: try {
128: FileOutputStream fos = new FileOutputStream(file);
129: OutputStreamWriter osw = new OutputStreamWriter(fos,
130: "UTF-8");
131:
132: osw.write(printNode(node));
133:
134: osw.close();
135: fos.close();
136: } catch (Exception e) {
137: m_logger.log(Level.WARNING, e.getMessage(), e);
138: }
139: }
140:
141: /**
142: * Prints a given DOM Document to a String.
143: *
144: * @param doc DOM Document to print
145: * @return String with output of printed DOM Node
146: */
147: public String printNode(Document doc)
148: throws NamespaceClashException {
149: StringBuffer sBuff = new StringBuffer(
150: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
151:
152: List aInScopeNamespaces = new ArrayList();
153:
154: int nChildren = doc.getChildNodes().getLength();
155:
156: if (nChildren == 1) {
157: Node firstNode = doc.getFirstChild();
158:
159: if (firstNode.getNodeType() == Node.TEXT_NODE
160: && ((Text) firstNode).getNodeValue().equals("")) {
161:
162: } else if (firstNode.getNodeType() == Node.TEXT_NODE) {
163: sBuff.append(((Text) firstNode).getData());
164:
165: } else {
166: printNode(firstNode, 1, sBuff,
167: (List) ((ArrayList) aInScopeNamespaces).clone());
168: }
169: } else {
170: NodeList nl = doc.getChildNodes();
171:
172: for (int i = 0; i < nl.getLength(); i++) {
173: printNode(nl.item(i), 1, sBuff,
174: (List) ((ArrayList) aInScopeNamespaces).clone());
175: }
176: }
177:
178: return sBuff.toString();
179: }
180:
181: /**
182: * Prints a given DOM Node to a String.
183: *
184: * @param node DOM Node to print
185: * @return String with output of printed DOM Node
186: * @throws NamespaceClashException
187: */
188: public String printNode(Node node) throws NamespaceClashException {
189: return printNode(node, true);
190: }
191:
192: /**
193: * Prints a given DOM Node to a String.
194: *
195: * @param node DOM Node to print
196: * @param includeXMLDeclaration true if the XML declaration should be included at the top of the output
197: * @return String with output of printed DOM Node
198: * @throws NamespaceClashException
199: */
200: public String printNode(Node node, boolean includeXMLDeclaration)
201: throws NamespaceClashException {
202: StringBuffer sBuff = new StringBuffer();
203: if (includeXMLDeclaration) {
204: sBuff
205: .append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
206: }
207:
208: List aInScopeNamespaces = new ArrayList();
209:
210: if (this .m_bNamespaceAware) {
211: if (node.getNodeType() == Node.ELEMENT_NODE
212: && ((Element) node).hasAttribute("xmlns")) {
213: this .m_resolver.addNamespace(((Element) node)
214: .getAttribute("xmlns"), "");
215: }
216: }
217:
218: if (node instanceof Text) {
219: printTextNode((Text) node, sBuff);
220: } else if (node instanceof Comment) {
221: printComment((Comment) node, 0, sBuff);
222: } else {
223: sBuff.append("<").append(
224: this .handleElementName(node, aInScopeNamespaces));
225:
226: if (node instanceof Element) {
227: printAttributes((Element) node, sBuff,
228: aInScopeNamespaces);
229: }
230: int nChildren = node.getChildNodes().getLength();
231: if (nChildren == 0) {
232: sBuff.append("/>");
233: return sBuff.toString();
234: } else if (nChildren == 1) {
235: Node firstNode = node.getFirstChild();
236: if (firstNode.getNodeType() == Node.TEXT_NODE
237: && ((Text) firstNode).getNodeValue().equals("")) {
238: sBuff.append("/>");
239: } else if (firstNode.getNodeType() == Node.TEXT_NODE) {
240: sBuff.append(">").append(
241: ((Text) firstNode).getData()).append("</")
242: .append(node.getNodeName()).append(">");
243: } else {
244: sBuff.append(">\n");
245: printNode(firstNode, 1, sBuff,
246: (List) ((ArrayList) aInScopeNamespaces)
247: .clone());
248: sBuff.append("\n</").append(
249: this .handleElementName(node,
250: aInScopeNamespaces)).append(">");
251: }
252: } else {
253: NodeList nl = node.getChildNodes();
254: sBuff.append(">\n");
255: for (int i = 0; i < nl.getLength(); i++) {
256: printNode(nl.item(i), 1, sBuff,
257: (List) ((ArrayList) aInScopeNamespaces)
258: .clone());
259: }
260: sBuff.append("</").append(
261: this
262: .handleElementName(node,
263: aInScopeNamespaces))
264: .append(">");
265: }
266: }
267: return sBuff.toString();
268: }
269:
270: /**
271: * Internal method to print a DOM Node.
272: *
273: * @param node DOM Node to print
274: * @param nLevel Depth of node in XML tree
275: * @param sBuff StringBuffer to print to
276: * @param aInScopeNamespaces List of namespaces that are currently in scope
277: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
278: */
279: private void printNode(Node node, int nLevel, StringBuffer sBuff,
280: List aInScopeNamespaces) throws NamespaceClashException {
281: if (node.getNodeType() == Node.ELEMENT_NODE) {
282: printElementNode((Element) node, nLevel + 1, sBuff,
283: aInScopeNamespaces);
284: } else if (node.getNodeType() == Node.CDATA_SECTION_NODE) {
285: printCDATA((CDATASection) node, nLevel + 1, sBuff);
286: } else if (node.getNodeType() == Node.TEXT_NODE) {
287: printTextNode((Text) node, sBuff);
288: } else if (node.getNodeType() == Node.COMMENT_NODE) {
289: printComment((Comment) node, nLevel, sBuff);
290: }
291: }
292:
293: /**
294: * Internal method to print a DOM Element.
295: *
296: * @param el DOM Element to print
297: * @param nLevel Depth of element in XML tree
298: * @param sBuff StringBuffer to print to
299: * @param aInScopeNamespaces List of namespaces that are currently in scope
300: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
301: */
302: private void printElementNode(Element el, int nLevel,
303: StringBuffer sBuff, List aInScopeNamespaces)
304: throws NamespaceClashException {
305: NodeList nl = el.getChildNodes();
306: int nChildren = nl.getLength();
307: if (nChildren == 0) {
308: printElementSelfClosing(el, nLevel, true, sBuff,
309: aInScopeNamespaces);
310: sBuff.append("\n");
311: } else if (nChildren == 1) {
312: Node firstnode = el.getFirstChild();
313: if (firstnode.getNodeType() == Node.TEXT_NODE
314: && (((Text) firstnode).getNodeValue() == null || ((Text) firstnode)
315: .getNodeValue().equals(""))) {
316: printElementSelfClosing(el, nLevel, true, sBuff,
317: (List) ((ArrayList) aInScopeNamespaces).clone());
318: sBuff.append("\n");
319: } else if (firstnode.getNodeType() == Node.TEXT_NODE) {
320: List newInScopeNamespaces = (List) ((ArrayList) aInScopeNamespaces)
321: .clone();
322: printElementStart(el, nLevel, true, sBuff,
323: newInScopeNamespaces);
324: printNode(firstnode, nLevel, sBuff,
325: newInScopeNamespaces);
326: printElementEnd(el, nLevel, false, sBuff,
327: newInScopeNamespaces);
328: sBuff.append("\n");
329: } else {
330: List newInScopeNamespaces = (List) ((ArrayList) aInScopeNamespaces)
331: .clone();
332: printElementStart(el, nLevel, true, sBuff,
333: newInScopeNamespaces);
334: sBuff.append("\n");
335: printNode(firstnode, nLevel + 1, sBuff,
336: newInScopeNamespaces);
337: printElementEnd(el, nLevel, true, sBuff,
338: newInScopeNamespaces);
339: sBuff.append("\n");
340: }
341: } else {
342: printElementStart(el, nLevel, true, sBuff,
343: aInScopeNamespaces);
344: sBuff.append("\n");
345: for (int i = 0; i < nChildren; i++) {
346: Node node = nl.item(i);
347: printNode(node, nLevel + 1, sBuff,
348: (List) ((ArrayList) aInScopeNamespaces).clone());
349: }
350: printElementEnd(el, nLevel, true, sBuff, aInScopeNamespaces);
351: sBuff.append("\n");
352: }
353: }
354:
355: /**
356: * Internal method for printing the opening tag of a DOM Element.
357: *
358: * @param el DOM Element for which to print the opening tag
359: * @param nLevel Depth of element in XML tree
360: * @param bTabs Controls if tabs are printed, they wouldn't be required for inline elements
361: * @param sBuff StringBuffer to print to
362: * @param aInScopeNamespaces List of namespaces that are currently in scope
363: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
364: */
365: private void printElementStart(Element el, int nLevel,
366: boolean bTabs, StringBuffer sBuff, List aInScopeNamespaces)
367: throws NamespaceClashException {
368: if (bTabs) {
369: printTabs(nLevel, sBuff);
370: }
371: sBuff.append("<").append(
372: this .handleElementName(el, aInScopeNamespaces));
373: printAttributes(el, sBuff, aInScopeNamespaces);
374: sBuff.append(">");
375: }
376:
377: /**
378: * Internal method for printing the closing tag of a DOM Element.
379: *
380: * @param el DOM Element for which to print the opening tag
381: * @param nLevel Depth of element in XML tree
382: * @param bTabs Controls if tabs are printed, they wouldn't be required for inline elements
383: * @param sBuff StringBuffer to print to
384: * @param aInScopeNamespaces List of namespaces that are currently in scope
385: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
386: */
387: private void printElementEnd(Element el, int nLevel, boolean bTabs,
388: StringBuffer sBuff, List aInScopeNamespaces)
389: throws NamespaceClashException {
390: if (bTabs) {
391: printTabs(nLevel, sBuff);
392: }
393: sBuff.append("</").append(
394: this .handleElementName(el, aInScopeNamespaces)).append(
395: ">");
396: }
397:
398: /**
399: * Internal method for printing the tag of a self-closing DOM Element, i.e. one that does
400: * not have any contents.
401: *
402: * @param el DOM Element for which to print the opening tag
403: * @param nLevel Depth of element in XML tree
404: * @param bTabs Controls if tabs are printed, they wouldn't be required for inline elements
405: * @param sBuff StringBuffer to print to
406: * @param aInScopeNamespaces List of namespaces that are currently in scope
407: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
408: */
409: private void printElementSelfClosing(Element el, int nLevel,
410: boolean bTabs, StringBuffer sBuff, List aInScopeNamespaces)
411: throws NamespaceClashException {
412: if (bTabs) {
413: printTabs(nLevel, sBuff);
414: }
415: sBuff.append("<").append(
416: this .handleElementName(el, aInScopeNamespaces));
417: printAttributes(el, sBuff, aInScopeNamespaces);
418: sBuff.append("/>");
419: }
420:
421: /**
422: * Internal method for printing the DOM Attributes of a DOM Element.
423: *
424: * @param el DOM Element for which to print attributes
425: * @param sBuff StringBuffer to print to
426: * @param aInScopeNamespaces List of namespaces that are currently in scope
427: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
428: */
429: private void printAttributes(Element el, StringBuffer sBuff,
430: List aInScopeNamespaces) throws NamespaceClashException {
431: NamedNodeMap attribs = el.getAttributes();
432: for (int i = 0; i < attribs.getLength(); i++) {
433: Attr nextAtt = (Attr) attribs.item(i);
434: if (!this .m_bNamespaceAware
435: || nextAtt.getNodeName().indexOf("xmlns") == -1) {
436: sBuff.append(" ").append(
437: this .handleAttributeName(nextAtt,
438: aInScopeNamespaces)).append("=\"")
439: .append(this .encodeXMLText(nextAtt.getValue()))
440: .append("\"");
441: }
442: }
443: }
444:
445: /**
446: * Internal method for printing a DOM Text node, wraps up any encoding duties.
447: *
448: * @param txt DOM Text node to print
449: * @param sBuff StringBuffer to print to
450: */
451: private void printTextNode(Text txt, StringBuffer sBuff) {
452: sBuff.append(encodeXMLText(txt.getNodeValue().trim()).trim());
453: }
454:
455: /**
456: * Internal method to print a DOM Element's name with all namespace issues dealt
457: * with.
458: *
459: * @param node DOM Element to deal with
460: * @param aInScopeNamespaces List of namespaces currently in scope
461: * @return Printed element name with any namespace declarations required
462: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
463: */
464: private String handleElementName(Node node, List aInScopeNamespaces)
465: throws NamespaceClashException {
466: String sReturn = "";
467:
468: if (this .m_bNamespaceAware) {
469: if (node.getLocalName() == null) {
470: sReturn = node.getNodeName();
471: } else {
472: if (node.getNamespaceURI() != null) {
473: String sPrefix = this .m_resolver
474: .getPrefixByNode(node);
475: sReturn = sPrefix;
476: if (!sPrefix.equals("")) {
477: sReturn = sReturn + ":";
478: }
479: sReturn = sReturn + node.getLocalName();
480: if (!aInScopeNamespaces.contains(node
481: .getNamespaceURI())) {
482: aInScopeNamespaces.add(node.getNamespaceURI());
483: sReturn = sReturn + " xmlns";
484: if (!sPrefix.equals("")) {
485: sReturn = sReturn + ":";
486: }
487: sReturn = sReturn + sPrefix + "=\""
488: + node.getNamespaceURI() + "\"";
489: }
490: } else {
491: sReturn = node.getLocalName();
492: }
493: }
494: } else {
495: if (node.getLocalName() == null) {
496: sReturn = node.getNodeName();
497: } else {
498: sReturn = node.getLocalName();
499: }
500: }
501:
502: return sReturn;
503: }
504:
505: /**
506: * Internal method to print a DOM Attributes's name with all namespace issues dealt
507: * with.
508: *
509: * @param node DOM Attribute to deal with
510: * @param aInScopeNamespaces List of namespaces currently in scope
511: * @return Printed attribute name with any namespace declarations required
512: * @throws NamespaceClashException Thrown from underlying NamespaceResolver
513: */
514: private String handleAttributeName(Node node,
515: List aInScopeNamespaces) throws NamespaceClashException {
516: String sReturn = "";
517:
518: if (this .m_bNamespaceAware) {
519: if (node.getLocalName() == null) {
520: sReturn = node.getNodeName();
521: } else {
522: if (node.getNamespaceURI() != null) {
523: String sPrefix = this .m_resolver
524: .getPrefixByNode(node);
525: if (!aInScopeNamespaces.contains(node
526: .getNamespaceURI())) {
527: aInScopeNamespaces.add(node.getNamespaceURI());
528: sReturn = "xmlns:" + sPrefix + "=\""
529: + node.getNamespaceURI() + "\" ";
530: }
531: sReturn = sReturn + sPrefix + ":"
532: + node.getLocalName();
533: } else {
534: sReturn = node.getLocalName();
535: }
536: }
537: } else {
538: if (node.getLocalName() == null) {
539: sReturn = node.getNodeName();
540: } else {
541: sReturn = node.getLocalName();
542: }
543: }
544:
545: return sReturn;
546: }
547:
548: /**
549: * Handles XML encoding of text, e.g. & to &.
550: *
551: * @param sText Text to XML encode
552: * @return XML Encoded text
553: */
554: public String encodeXMLText(String sText) {
555: StringBuffer sBuff2 = new StringBuffer(sText);
556: StringBuffer sNewBuff = new StringBuffer();
557:
558: for (int i = 0; i < sBuff2.length(); i++) {
559: char currChar = sBuff2.charAt(i);
560: Character currCharObj = new Character(sBuff2.charAt(i));
561: if (currChar == '&') {
562: if ((sBuff2.length() - 1 - i) >= 4
563: && sBuff2.charAt(i + 1) == 'a'
564: && sBuff2.charAt(i + 2) == 'm'
565: && sBuff2.charAt(i + 3) == 'p'
566: && sBuff2.charAt(i + 4) == ';') {
567: i = i + 4;
568: sNewBuff.append("&");
569: } else {
570: sNewBuff.append("&");
571: }
572: } else if (currChar == '>') {
573: sNewBuff.append(">");
574: } else if (currChar == '<') {
575: sNewBuff.append("<");
576: } else {
577: sNewBuff.append(currChar);
578: }
579: }
580:
581: return sNewBuff.toString();
582: }
583:
584: /**
585: * Handles XML decoding of text, e.g. &amp; to &;.
586: *
587: * @param sText Text to XML decode
588: * @return XML decoded text
589: */
590: public String decodeXMLText(String sText) {
591: StringBuffer sBuff2 = new StringBuffer(sText);
592: StringBuffer sNewBuff = new StringBuffer();
593:
594: for (int i = 0; i < sBuff2.length(); i++) {
595: char currChar = sBuff2.charAt(i);
596: Character currCharObj = new Character(sBuff2.charAt(i));
597: if (currChar == '&') {
598: if (sBuff2.charAt(i + 1) == 'a'
599: && sBuff2.charAt(i + 2) == 'm'
600: && sBuff2.charAt(i + 3) == 'p'
601: && sBuff2.charAt(i + 4) == ';'
602: && sBuff2.charAt(i + 5) == 'a'
603: && sBuff2.charAt(i + 6) == 'm'
604: && sBuff2.charAt(i + 7) == 'p'
605: && sBuff2.charAt(i + 8) == ';') {
606: i = i + 8;
607: sNewBuff.append("&");
608: } else if (sBuff2.charAt(i + 1) == 'a'
609: && sBuff2.charAt(i + 2) == 'm'
610: && sBuff2.charAt(i + 3) == 'p'
611: && sBuff2.charAt(i + 4) == ';') {
612: i = i + 4;
613: sNewBuff.append("&");
614: } else if (currChar == '£') {
615: System.out.println("Decoding pound");
616: sNewBuff.append("£");
617: } else {
618: sNewBuff.append("&");
619: }
620: } else {
621: sNewBuff.append(currChar);
622: }
623: }
624:
625: return sNewBuff.toString();
626: }
627:
628: /**
629: * Internal method to print a DOM CDATA Section.
630: *
631: * @param cdata DOM CDATASection to be printed
632: * @param nLevel Depth of CDATA Section in XML tree
633: * @param sBuff StringBuffer to print to
634: */
635: private void printCDATA(CDATASection cdata, int nLevel,
636: StringBuffer sBuff) {
637: printTabs(nLevel, sBuff);
638: sBuff.append("<![CDATA[").append(cdata.getData()).append(
639: "]]>\n");
640: }
641:
642: /**
643: * Internal method to print a DOM Comment.
644: *
645: * @param comment DOM Comment to be printed
646: * @param nLevel Depth of CDATA Section in XML tree
647: * @param sBuff StringBuffer to print to
648: */
649: private void printComment(Comment comment, int nLevel,
650: StringBuffer sBuff) {
651: printTabs(nLevel, sBuff);
652: sBuff.append("<!--").append(comment.getData()).append("-->\n");
653: }
654:
655: /**
656: * Internal method to print the required number of tabs for pretty printing.
657: *
658: * @param nLevel Number of tab sets to print, i.e. depth in XML tree
659: * @param sBuff StringBuffer to print to
660: */
661: private void printTabs(int nLevel, StringBuffer sBuff) {
662: for (int i = 0; i < nLevel; i++) {
663: sBuff.append(" ");
664: }
665: }
666:
667: }
|