001: /*
002: * This file is part of PFIXCORE.
003: *
004: * PFIXCORE is free software; you can redistribute it and/or modify
005: * it under the terms of the GNU Lesser General Public License as published by
006: * the Free Software Foundation; either version 2 of the License, or
007: * (at your option) any later version.
008: *
009: * PFIXCORE is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
012: * GNU Lesser General Public License for more details.
013: *
014: * You should have received a copy of the GNU Lesser General Public License
015: * along with PFIXCORE; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
017: *
018: */
019: package de.schlund.pfixxml.util;
020:
021: import java.io.ByteArrayOutputStream;
022: import java.io.File;
023: import java.io.FileOutputStream;
024: import java.io.IOException;
025: import java.io.InputStream;
026: import java.io.OutputStream;
027: import java.io.StringReader;
028: import java.io.StringWriter;
029: import java.io.Writer;
030: import java.net.MalformedURLException;
031: import java.util.Iterator;
032: import java.util.Map;
033:
034: import javax.xml.parsers.DocumentBuilder;
035: import javax.xml.parsers.DocumentBuilderFactory;
036: import javax.xml.parsers.ParserConfigurationException;
037: import javax.xml.transform.OutputKeys;
038: import javax.xml.transform.Result;
039: import javax.xml.transform.Source;
040: import javax.xml.transform.Transformer;
041: import javax.xml.transform.TransformerException;
042: import javax.xml.transform.dom.DOMSource;
043: import javax.xml.transform.sax.SAXSource;
044: import javax.xml.transform.stream.StreamResult;
045:
046: import org.apache.log4j.Logger;
047: import org.w3c.dom.Comment;
048: import org.w3c.dom.Document;
049: import org.w3c.dom.Element;
050: import org.w3c.dom.Node;
051: import org.w3c.dom.Text;
052: import org.xml.sax.ErrorHandler;
053: import org.xml.sax.InputSource;
054: import org.xml.sax.SAXException;
055: import org.xml.sax.SAXParseException;
056: import org.xml.sax.XMLReader;
057:
058: import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl;
059: import com.sun.org.apache.xerces.internal.parsers.SAXParser;
060:
061: import de.schlund.pfixxml.SPDocument;
062: import de.schlund.pfixxml.resources.FileResource;
063:
064: public class Xml {
065:
066: static final Logger CAT = Logger.getLogger(Xml.class);
067: private static final DocumentBuilderFactory factory = createDocumentBuilderFactory();
068:
069: //-- this is where you configure the xml parser:
070:
071: public static XMLReader createXMLReader() {
072: XMLReader reader;
073: reader = new SAXParser();
074: reader.setErrorHandler(ERROR_HANDLER);
075: return reader;
076: }
077:
078: public static DocumentBuilder createDocumentBuilder() {
079: DocumentBuilder result;
080: try {
081: result = factory.newDocumentBuilder();
082: } catch (ParserConfigurationException e) {
083: throw new RuntimeException("createDocumentBuilder failed",
084: e);
085: }
086: result.setErrorHandler(ERROR_HANDLER);
087: return result;
088: }
089:
090: public static Document createDocument() {
091: return createDocumentBuilder().newDocument();
092: }
093:
094: //-- parse immutable
095:
096: public static Document parseString(XsltVersion xsltVersion,
097: String str) throws TransformerException {
098: SAXSource src = new SAXSource(createXMLReader(),
099: new InputSource(new StringReader(str)));
100: return parse(xsltVersion, src);
101: }
102:
103: /**
104: * Convert the document implementation which is used for write-access
105: * by {@link SPDocument} to the document implementation which is used
106: * by the XSLTProcessor. Note: Currently we convert here from a mutable
107: * DOM implementation to an imutable TinyTree(saxon).
108: * @param doc the document as source for conversion(mostly a Node implementation
109: * when using xerces)
110: * @return a document as result of conversion(currently saxons TinyDocumentImpl)
111: * @throws Exception on all errors
112: */
113: public static Document parse(XsltVersion xsltVersion, Document doc) {
114: if (XsltProvider.getXmlSupport(xsltVersion).isInternalDOM(doc)) {
115: return doc;
116: } else {
117: DOMSource domsrc = new DOMSource(doc);
118: try {
119: return parse(xsltVersion, domsrc);
120: } catch (TransformerException e) {
121: throw new RuntimeException(
122: "a dom tree is always well-formed xml", e);
123: }
124: }
125: }
126:
127: public static Document parse(XsltVersion xsltVersion,
128: FileResource file) throws TransformerException {
129: SAXSource src;
130: try {
131: src = new SAXSource(createXMLReader(), new InputSource(file
132: .toURL().toString()));
133: } catch (MalformedURLException e) {
134: throw new TransformerException(
135: "Cannot create URL for input file: "
136: + file.toString(), e);
137: }
138: return parse(xsltVersion, src);
139: }
140:
141: /**
142: * Create a document from a sourcefile in the filesystem.
143: * @param path the path to the source file in the filesystem
144: * @return the created document(currenly saxons TinyDocumentImpl)
145: * @throws TransformerException on errors
146: */
147: public static Document parse(XsltVersion xsltVersion, File file)
148: throws TransformerException {
149: SAXSource src = new SAXSource(createXMLReader(),
150: new InputSource(toUri(file)));
151: return parse(xsltVersion, src);
152: }
153:
154: private static String toUri(File file) {
155: // TODO: file.toURI returns single-slash.uri ...
156: return "file://" + file.getAbsolutePath();
157: }
158:
159: public static Document parse(XsltVersion xsltVersion, Source input)
160: throws TransformerException {
161: try {
162: Document doc = XsltProvider.getXmlSupport(xsltVersion)
163: .createInternalDOM(input);
164: return doc;
165: } catch (TransformerException e) {
166: StringBuffer sb = new StringBuffer();
167: sb.append("TransformerException in xmlObjectFromDisc!\n");
168: sb.append("Path: ").append(input.getSystemId())
169: .append("\n");
170: sb.append("Message and Location: ").append(e.getMessage())
171: .append("\n");
172: Throwable cause = e.getException();
173: sb.append("Cause: ").append(
174: (cause != null) ? cause.getMessage() : "none")
175: .append("\n");
176: CAT.error(sb.toString());
177: throw e;
178: }
179: }
180:
181: //-- parse mutable
182:
183: public static Document parseStringMutable(String text)
184: throws SAXException {
185: try {
186: return parseMutable(new InputSource(new StringReader(text)));
187: } catch (IOException e) {
188: throw new RuntimeException(
189: "unexpected ioexception while reading from memory",
190: e);
191: }
192: }
193:
194: public static Document parseMutable(FileResource file)
195: throws IOException, SAXException {
196: if (file.isDirectory()) {
197: // otherwise, I get obscure content-not-allowed-here exceptions
198: throw new IOException("expected file, got directory: "
199: + file);
200: }
201: return parseMutable(new InputSource(file.toURL().toString()));
202: }
203:
204: public static Document parseMutable(File file) throws IOException,
205: SAXException {
206: if (file.isDirectory()) {
207: // otherwise, I get obscure content-not-allowed-here exceptions
208: throw new IOException("expected file, got directory: "
209: + file);
210: }
211: return parseMutable(new InputSource(toUri(file)));
212: }
213:
214: public static Document parseMutable(String filename)
215: throws IOException, SAXException {
216: return parseMutable(new File(filename));
217: }
218:
219: public static Document parseMutable(InputStream src)
220: throws IOException, SAXException {
221: return parseMutable(new InputSource(src));
222: }
223:
224: public static Document parseMutable(InputSource src)
225: throws IOException, SAXException {
226: try {
227: return createDocumentBuilder().parse(src);
228: } catch (SAXParseException e) {
229: StringBuffer buf = new StringBuffer(100);
230: buf.append("Caught SAXParseException!\n");
231: buf.append(" Message : ").append(e.getMessage()).append(
232: "\n");
233: buf.append(" SystemID : ").append(e.getSystemId()).append(
234: "\n");
235: buf.append(" Line : ").append(e.getLineNumber())
236: .append("\n");
237: buf.append(" Column : ").append(e.getColumnNumber())
238: .append("\n");
239: CAT.error(buf.toString(), e);
240: throw e;
241: } catch (SAXException e) {
242: StringBuffer buf = new StringBuffer(100);
243: buf.append("Caught SAXException!\n");
244: buf.append(" Message : ").append(e.getMessage()).append(
245: "\n");
246: buf.append(" SystemID : ").append(src.getSystemId())
247: .append("\n");
248: CAT.error(buf.toString(), e);
249: throw e;
250: } catch (IOException e) {
251: StringBuffer buf = new StringBuffer(100);
252: buf.append("Caught IOException!\n");
253: buf.append(" Message : ").append(e.getMessage()).append(
254: "\n");
255: buf.append(" SystemID : ").append(src.getSystemId())
256: .append("\n");
257: CAT.error(buf.toString(), e);
258: throw e;
259: }
260: }
261:
262: //-- serialization
263:
264: /**
265: * @param pp pretty print
266: */
267: public static String serialize(Node node, boolean pp, boolean decl) {
268: StringWriter dest;
269:
270: dest = new StringWriter();
271: try {
272: doSerialize(node, dest, pp, decl);
273: } catch (IOException e) {
274: throw new RuntimeException(
275: "unexpected IOException while writing to memory", e);
276: }
277: return dest.getBuffer().toString();
278: }
279:
280: public static void serialize(Node node, FileResource file,
281: boolean pp, boolean decl) throws IOException {
282: ByteArrayOutputStream tmp = new ByteArrayOutputStream();
283:
284: serialize(node, tmp, pp, decl);
285:
286: OutputStream dest = file.getOutputStream();
287: dest.write(tmp.toByteArray());
288: dest.close();
289: }
290:
291: /**
292: * @param pp pretty print
293: */
294: public static void serialize(Node node, File file, boolean pp,
295: boolean decl) throws IOException {
296: serialize(node, file.getPath(), pp, decl);
297: }
298:
299: /**
300: * @param pp pretty print
301: */
302: public static void serialize(Node node, String filename,
303: boolean pp, boolean decl) throws IOException {
304: FileOutputStream dest;
305:
306: if (node == null) {
307: throw new IllegalArgumentException(
308: "The parameter 'null' is not allowed here! "
309: + "Can't serialize a null node to a file!");
310: }
311: if (filename == null || filename.equals("")) {
312: throw new IllegalArgumentException(
313: "The parameter 'null' or '\"\"' is not allowed here! "
314: + "Can't serialize a document to "
315: + filename + "!");
316: }
317:
318: File finalfile = new File(filename);
319: File tmpfile = new File(finalfile.getParentFile(), ".#"
320: + finalfile.getName() + ".tmp");
321: dest = new FileOutputStream(tmpfile);
322:
323: doSerialize(node, dest, pp, true);
324:
325: // We append a newline because most editors do so and we want to avoid cvs conflicts.
326: // Note: trailing whitespace is removed when parsing a file, so
327: // it's save to append it here without checking for exiting newlines.
328: dest.write('\n');
329:
330: dest.close();
331: if (!tmpfile.renameTo(finalfile)) {
332: throw new RuntimeException(
333: "Could not rename temporary file '" + tmpfile
334: + "' to file '" + finalfile + "'!");
335: }
336: }
337:
338: public static void serialize(Node node, OutputStream dest,
339: boolean pp, boolean decl) throws IOException {
340: doSerialize(node, dest, pp, decl);
341: }
342:
343: // PRIVATE
344:
345: private static DocumentBuilderFactory createDocumentBuilderFactory() {
346: DocumentBuilderFactory fact = new DocumentBuilderFactoryImpl();
347: if (!fact.isNamespaceAware()) {
348: fact.setNamespaceAware(true);
349: }
350: if (fact.isValidating()) {
351: fact.setValidating(false);
352: }
353: return fact;
354: }
355:
356: // make sure that output is not polluted by prinlns:
357: private static final ErrorHandler ERROR_HANDLER = new ErrorHandler() {
358: public void error(SAXParseException exception)
359: throws SAXException {
360: report(exception);
361: }
362:
363: public void fatalError(SAXParseException exception)
364: throws SAXException {
365: report(exception);
366: }
367:
368: public void warning(SAXParseException exception)
369: throws SAXException {
370: report(exception);
371: }
372:
373: private void report(SAXParseException exception)
374: throws SAXException {
375: CAT.error(exception.getSystemId() + ":"
376: + exception.getLineNumber() + ":"
377: + exception.getColumnNumber() + ":"
378: + exception.getMessage());
379: throw exception;
380: }
381: };
382:
383: private static final String ENCODING = "ISO-8859-1";
384:
385: /**
386: * @param pp pretty print
387: */
388: private static void doSerialize(Node node, Object dest, boolean pp,
389: boolean decl) throws IOException {
390: if (node == null) {
391: throw new IllegalArgumentException(
392: "The parameter 'null' is not allowed here! "
393: + "Can't serialize a null node!");
394: }
395: Transformer t;
396: Result result;
397: Throwable cause;
398: DOMSource src;
399:
400: // TODO: remove special cases
401: if (node instanceof Text) {
402: write(((Text) node).getData(), dest);
403: return;
404: } else if (node instanceof Comment) {
405: write("<!--" + ((Comment) node).getData() + "-->", dest);
406: return;
407: }
408:
409: if (!(node instanceof Document) && !(node instanceof Element)) {
410: throw new IllegalArgumentException(
411: "unsupported node type: " + node.getClass());
412: }
413:
414: XsltVersion xsltVersion = getXsltVersion(node);
415: if (xsltVersion == null)
416: xsltVersion = XsltProvider.getPreferredXsltVersion();
417:
418: if (pp) {
419: t = Xslt.createPrettyPrinter(xsltVersion);
420: } else {
421: t = Xslt.createIdentityTransformer(xsltVersion);
422: }
423: t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION,
424: decl ? "no" : "yes");
425: if (decl) {
426: t.setOutputProperty(OutputKeys.ENCODING, ENCODING);
427: } else {
428: // don't set encoding, I'd force an xml decl by setting it.
429: }
430: t.setOutputProperty(OutputKeys.INDENT, pp ? "yes" : "no");
431: t.setOutputProperty(XsltProvider.getXmlSupport(xsltVersion)
432: .getIndentOutputKey(), "2");
433:
434: src = new DOMSource(wrap(node));
435: if (dest instanceof Writer) {
436: result = new StreamResult((Writer) dest);
437: } else if (dest instanceof OutputStream) {
438: result = new StreamResult((OutputStream) dest);
439: } else {
440: throw new RuntimeException(
441: "Only Writer or OutputStreams allowed: "
442: + dest.getClass());
443: }
444: try {
445: t.transform(src, result);
446: } catch (TransformerException e) {
447: cause = e.getCause();
448: if (cause instanceof IOException) {
449: throw (IOException) cause;
450: } else {
451: throw new RuntimeException(
452: "unexpected problem with identity transformer",
453: e);
454: }
455: }
456: }
457:
458: private static void write(String str, Object dest)
459: throws IOException {
460: if (dest instanceof Writer) {
461: ((Writer) dest).write(str);
462: } else {
463: ((OutputStream) dest).write(str.getBytes(ENCODING));
464: }
465: }
466:
467: private static Document wrap(Node node) {
468: // ugly hack to work-around saxon limitation: 6.5.3 cannot run xslt on sub-trees:
469: // solved in 7.7: http://saxon.sourceforge.net/saxon7.7/changes.html (see 'jaxp changes')
470:
471: // TODO: implicit namespace attributes in tiny-tree nodes might vanish
472: Document doc;
473:
474: if (node instanceof Document) {
475: doc = (Document) node;
476: } else {
477: doc = Xml.createDocument();
478: doc.appendChild(doc.importNode(node, true));
479: }
480: return doc;
481: }
482:
483: public static String stripElement(String ele) {
484: int start;
485: int end;
486:
487: if (ele.startsWith("<?")) {
488: throw new IllegalArgumentException(ele);
489: }
490: if (!ele.startsWith("<")) {
491: throw new IllegalArgumentException(ele);
492: }
493: if (!ele.endsWith(">")) {
494: throw new IllegalArgumentException(ele);
495: }
496: if (ele.endsWith("/>")) {
497: return "";
498: }
499: start = ele.indexOf('>');
500: if (start == -1) {
501: throw new IllegalArgumentException(ele);
502: }
503: end = ele.lastIndexOf('<');
504: if (end == -1) {
505: throw new IllegalArgumentException(ele);
506: }
507: return ele.substring(start + 1, end);
508: }
509:
510: public static XsltVersion getXsltVersion(Node node) {
511: Iterator<Map.Entry<XsltVersion, XmlSupport>> it = XsltProvider
512: .getXmlSupport().entrySet().iterator();
513: while (it.hasNext()) {
514: Map.Entry<XsltVersion, XmlSupport> entry = it.next();
515: if (entry.getValue().isInternalDOM(node))
516: return entry.getKey();
517: }
518: return null;
519: }
520:
521: }
|