001: /*
002: * Copyright 1999-2005 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package com.lowagie.text.xml;
018:
019: import java.io.OutputStream;
020: import java.io.OutputStreamWriter;
021: import java.io.PrintWriter;
022: import java.io.UnsupportedEncodingException;
023:
024: import org.w3c.dom.Attr;
025: import org.w3c.dom.Document;
026: import org.w3c.dom.DocumentType;
027: import org.w3c.dom.NamedNodeMap;
028: import org.w3c.dom.Node;
029:
030: /**
031: *
032: * @author psoares
033: */
034: public class XmlDomWriter {
035:
036: /** Print writer. */
037: protected PrintWriter fOut;
038:
039: /** Canonical output. */
040: protected boolean fCanonical;
041:
042: /** Processing XML 1.1 document. */
043: protected boolean fXML11;
044:
045: //
046: // Constructors
047: //
048:
049: /** Default constructor. */
050: public XmlDomWriter() {
051: } // <init>()
052:
053: public XmlDomWriter(boolean canonical) {
054: fCanonical = canonical;
055: } // <init>(boolean)
056:
057: //
058: // Public methods
059: //
060:
061: /** Sets whether output is canonical. */
062: public void setCanonical(boolean canonical) {
063: fCanonical = canonical;
064: } // setCanonical(boolean)
065:
066: /** Sets the output stream for printing. */
067: public void setOutput(OutputStream stream, String encoding)
068: throws UnsupportedEncodingException {
069:
070: if (encoding == null) {
071: encoding = "UTF8";
072: }
073:
074: java.io.Writer writer = new OutputStreamWriter(stream, encoding);
075: fOut = new PrintWriter(writer);
076:
077: } // setOutput(OutputStream,String)
078:
079: /** Sets the output writer. */
080: public void setOutput(java.io.Writer writer) {
081:
082: fOut = writer instanceof PrintWriter ? (PrintWriter) writer
083: : new PrintWriter(writer);
084:
085: } // setOutput(java.io.Writer)
086:
087: /** Writes the specified node, recursively. */
088: public void write(Node node) {
089:
090: // is there anything to do?
091: if (node == null) {
092: return;
093: }
094:
095: short type = node.getNodeType();
096: switch (type) {
097: case Node.DOCUMENT_NODE: {
098: Document document = (Document) node;
099: fXML11 = false; //"1.1".equals(getVersion(document));
100: if (!fCanonical) {
101: if (fXML11) {
102: fOut
103: .println("<?xml version=\"1.1\" encoding=\"UTF-8\"?>");
104: } else {
105: fOut
106: .println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
107: }
108: fOut.flush();
109: write(document.getDoctype());
110: }
111: write(document.getDocumentElement());
112: break;
113: }
114:
115: case Node.DOCUMENT_TYPE_NODE: {
116: DocumentType doctype = (DocumentType) node;
117: fOut.print("<!DOCTYPE ");
118: fOut.print(doctype.getName());
119: String publicId = doctype.getPublicId();
120: String systemId = doctype.getSystemId();
121: if (publicId != null) {
122: fOut.print(" PUBLIC '");
123: fOut.print(publicId);
124: fOut.print("' '");
125: fOut.print(systemId);
126: fOut.print('\'');
127: } else if (systemId != null) {
128: fOut.print(" SYSTEM '");
129: fOut.print(systemId);
130: fOut.print('\'');
131: }
132: String internalSubset = doctype.getInternalSubset();
133: if (internalSubset != null) {
134: fOut.println(" [");
135: fOut.print(internalSubset);
136: fOut.print(']');
137: }
138: fOut.println('>');
139: break;
140: }
141:
142: case Node.ELEMENT_NODE: {
143: fOut.print('<');
144: fOut.print(node.getNodeName());
145: Attr attrs[] = sortAttributes(node.getAttributes());
146: for (int i = 0; i < attrs.length; i++) {
147: Attr attr = attrs[i];
148: fOut.print(' ');
149: fOut.print(attr.getNodeName());
150: fOut.print("=\"");
151: normalizeAndPrint(attr.getNodeValue(), true);
152: fOut.print('"');
153: }
154: fOut.print('>');
155: fOut.flush();
156:
157: Node child = node.getFirstChild();
158: while (child != null) {
159: write(child);
160: child = child.getNextSibling();
161: }
162: break;
163: }
164:
165: case Node.ENTITY_REFERENCE_NODE: {
166: if (fCanonical) {
167: Node child = node.getFirstChild();
168: while (child != null) {
169: write(child);
170: child = child.getNextSibling();
171: }
172: } else {
173: fOut.print('&');
174: fOut.print(node.getNodeName());
175: fOut.print(';');
176: fOut.flush();
177: }
178: break;
179: }
180:
181: case Node.CDATA_SECTION_NODE: {
182: if (fCanonical) {
183: normalizeAndPrint(node.getNodeValue(), false);
184: } else {
185: fOut.print("<![CDATA[");
186: fOut.print(node.getNodeValue());
187: fOut.print("]]>");
188: }
189: fOut.flush();
190: break;
191: }
192:
193: case Node.TEXT_NODE: {
194: normalizeAndPrint(node.getNodeValue(), false);
195: fOut.flush();
196: break;
197: }
198:
199: case Node.PROCESSING_INSTRUCTION_NODE: {
200: fOut.print("<?");
201: fOut.print(node.getNodeName());
202: String data = node.getNodeValue();
203: if (data != null && data.length() > 0) {
204: fOut.print(' ');
205: fOut.print(data);
206: }
207: fOut.print("?>");
208: fOut.flush();
209: break;
210: }
211:
212: case Node.COMMENT_NODE: {
213: if (!fCanonical) {
214: fOut.print("<!--");
215: String comment = node.getNodeValue();
216: if (comment != null && comment.length() > 0) {
217: fOut.print(comment);
218: }
219: fOut.print("-->");
220: fOut.flush();
221: }
222: }
223: }
224:
225: if (type == Node.ELEMENT_NODE) {
226: fOut.print("</");
227: fOut.print(node.getNodeName());
228: fOut.print('>');
229: fOut.flush();
230: }
231:
232: } // write(Node)
233:
234: /** Returns a sorted list of attributes. */
235: protected Attr[] sortAttributes(NamedNodeMap attrs) {
236:
237: int len = (attrs != null) ? attrs.getLength() : 0;
238: Attr array[] = new Attr[len];
239: for (int i = 0; i < len; i++) {
240: array[i] = (Attr) attrs.item(i);
241: }
242: for (int i = 0; i < len - 1; i++) {
243: String name = array[i].getNodeName();
244: int index = i;
245: for (int j = i + 1; j < len; j++) {
246: String curName = array[j].getNodeName();
247: if (curName.compareTo(name) < 0) {
248: name = curName;
249: index = j;
250: }
251: }
252: if (index != i) {
253: Attr temp = array[i];
254: array[i] = array[index];
255: array[index] = temp;
256: }
257: }
258:
259: return array;
260:
261: } // sortAttributes(NamedNodeMap):Attr[]
262:
263: //
264: // Protected methods
265: //
266:
267: /** Normalizes and prints the given string. */
268: protected void normalizeAndPrint(String s, boolean isAttValue) {
269:
270: int len = (s != null) ? s.length() : 0;
271: for (int i = 0; i < len; i++) {
272: char c = s.charAt(i);
273: normalizeAndPrint(c, isAttValue);
274: }
275:
276: } // normalizeAndPrint(String,boolean)
277:
278: /** Normalizes and print the given character. */
279: protected void normalizeAndPrint(char c, boolean isAttValue) {
280:
281: switch (c) {
282: case '<': {
283: fOut.print("<");
284: break;
285: }
286: case '>': {
287: fOut.print(">");
288: break;
289: }
290: case '&': {
291: fOut.print("&");
292: break;
293: }
294: case '"': {
295: // A '"' that appears in character data
296: // does not need to be escaped.
297: if (isAttValue) {
298: fOut.print(""");
299: } else {
300: fOut.print("\"");
301: }
302: break;
303: }
304: case '\r': {
305: // If CR is part of the document's content, it
306: // must not be printed as a literal otherwise
307: // it would be normalized to LF when the document
308: // is reparsed.
309: fOut.print("
");
310: break;
311: }
312: case '\n': {
313: if (fCanonical) {
314: fOut.print("
");
315: break;
316: }
317: // else, default print char
318: }
319: default: {
320: // In XML 1.1, control chars in the ranges [#x1-#x1F, #x7F-#x9F] must be escaped.
321: //
322: // Escape space characters that would be normalized to #x20 in attribute values
323: // when the document is reparsed.
324: //
325: // Escape NEL (0x85) and LSEP (0x2028) that appear in content
326: // if the document is XML 1.1, since they would be normalized to LF
327: // when the document is reparsed.
328: if (fXML11
329: && ((c >= 0x01 && c <= 0x1F && c != 0x09 && c != 0x0A)
330: || (c >= 0x7F && c <= 0x9F) || c == 0x2028)
331: || isAttValue && (c == 0x09 || c == 0x0A)) {
332: fOut.print("&#x");
333: fOut.print(Integer.toHexString(c).toUpperCase());
334: fOut.print(";");
335: } else {
336: fOut.print(c);
337: }
338: }
339: }
340: } // normalizeAndPrint(char,boolean)
341:
342: /** Extracts the XML version from the Document. */
343: // protected String getVersion(Document document) {
344: // if (document == null) {
345: // return null;
346: // }
347: // String version = null;
348: // Method getXMLVersion = null;
349: // try {
350: // getXMLVersion = document.getClass().getMethod("getXmlVersion", new Class[]{});
351: // // If Document class implements DOM L3, this method will exist.
352: // if (getXMLVersion != null) {
353: // version = (String) getXMLVersion.invoke(document, (Object[]) null);
354: // }
355: // } catch (Exception e) {
356: // // Either this locator object doesn't have
357: // // this method, or we're on an old JDK.
358: // }
359: // return version;
360: // } // getVersion(Document)
361: }
|