001: /*
002: * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
003: *
004: * This software is open source.
005: * See the bottom of this file for the licence.
006: *
007: * $Id: XMPPPacketReader.java 3190 2005-12-12 15:00:46Z gato $
008: */
009:
010: package org.dom4j.io;
011:
012: import org.dom4j.*;
013: import org.jivesoftware.openfire.net.MXParser;
014: import org.xmlpull.v1.XmlPullParser;
015: import org.xmlpull.v1.XmlPullParserException;
016: import org.xmlpull.v1.XmlPullParserFactory;
017:
018: import java.io.*;
019: import java.net.URL;
020:
021: /**
022: * <p><code>XMPPPacketReader</code> is a Reader of DOM4J documents that
023: * uses the fast
024: * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x</a>.
025: * It is very fast for use in SOAP style environments.</p>
026: *
027: * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard</a>
028: * @author <a href="mailto:jstrachan@apache.org">James Strachan</a>
029: * @version $Revision: 3190 $
030: */
031: public class XMPPPacketReader {
032:
033: /**
034: * <code>DocumentFactory</code> used to create new document objects
035: */
036: private DocumentFactory factory;
037:
038: /**
039: * <code>XmlPullParser</code> used to parse XML
040: */
041: private MXParser xppParser;
042:
043: /**
044: * <code>XmlPullParser</code> used to parse XML
045: */
046: private XmlPullParserFactory xppFactory;
047:
048: /**
049: * DispatchHandler to call when each <code>Element</code> is encountered
050: */
051: private DispatchHandler dispatchHandler;
052:
053: /**
054: * Last time a full Document was read or a heartbeat was received. Hearbeats
055: * are represented as whitespaces received while a Document is not being parsed.
056: */
057: private long lastActive = System.currentTimeMillis();
058:
059: public XMPPPacketReader() {
060: }
061:
062: public XMPPPacketReader(DocumentFactory factory) {
063: this .factory = factory;
064: }
065:
066: /**
067: * <p>Reads a Document from the given <code>File</code></p>
068: *
069: * @param file is the <code>File</code> to read from.
070: * @return the newly created Document instance
071: * @throws DocumentException if an error occurs during parsing.
072: * @throws java.net.MalformedURLException if a URL could not be made for the given File
073: */
074: public Document read(File file) throws DocumentException,
075: IOException, XmlPullParserException {
076: String systemID = file.getAbsolutePath();
077: return read(new BufferedReader(new FileReader(file)), systemID);
078: }
079:
080: /**
081: * <p>Reads a Document from the given <code>URL</code></p>
082: *
083: * @param url <code>URL</code> to read from.
084: * @return the newly created Document instance
085: * @throws DocumentException if an error occurs during parsing.
086: */
087: public Document read(URL url) throws DocumentException,
088: IOException, XmlPullParserException {
089: String systemID = url.toExternalForm();
090: return read(createReader(url.openStream()), systemID);
091: }
092:
093: /**
094: * <p>Reads a Document from the given URL or filename.</p>
095: * <p/>
096: * <p/>
097: * If the systemID contains a <code>':'</code> character then it is
098: * assumed to be a URL otherwise its assumed to be a file name.
099: * If you want finer grained control over this mechansim then please
100: * explicitly pass in either a {@link URL} or a {@link File} instance
101: * instead of a {@link String} to denote the source of the document.
102: * </p>
103: *
104: * @param systemID is a URL for a document or a file name.
105: * @return the newly created Document instance
106: * @throws DocumentException if an error occurs during parsing.
107: * @throws java.net.MalformedURLException if a URL could not be made for the given File
108: */
109: public Document read(String systemID) throws DocumentException,
110: IOException, XmlPullParserException {
111: if (systemID.indexOf(':') >= 0) {
112: // lets assume its a URL
113: return read(new URL(systemID));
114: } else {
115: // lets assume that we are given a file name
116: return read(new File(systemID));
117: }
118: }
119:
120: /**
121: * <p>Reads a Document from the given stream</p>
122: *
123: * @param in <code>InputStream</code> to read from.
124: * @return the newly created Document instance
125: * @throws DocumentException if an error occurs during parsing.
126: */
127: public Document read(InputStream in) throws DocumentException,
128: IOException, XmlPullParserException {
129: return read(createReader(in));
130: }
131:
132: /**
133: * <p>Reads a Document from the given stream</p>
134: *
135: * @param charSet the charSet that the input is encoded in
136: * @param in <code>InputStream</code> to read from.
137: * @return the newly created Document instance
138: * @throws DocumentException if an error occurs during parsing.
139: */
140: public Document read(String charSet, InputStream in)
141: throws DocumentException, IOException,
142: XmlPullParserException {
143: return read(createReader(in, charSet));
144: }
145:
146: /**
147: * <p>Reads a Document from the given <code>Reader</code></p>
148: *
149: * @param reader is the reader for the input
150: * @return the newly created Document instance
151: * @throws DocumentException if an error occurs during parsing.
152: */
153: public Document read(Reader reader) throws DocumentException,
154: IOException, XmlPullParserException {
155: getXPPParser().setInput(reader);
156: return parseDocument();
157: }
158:
159: /**
160: * <p>Reads a Document from the given array of characters</p>
161: *
162: * @param text is the text to parse
163: * @return the newly created Document instance
164: * @throws DocumentException if an error occurs during parsing.
165: */
166: public Document read(char[] text) throws DocumentException,
167: IOException, XmlPullParserException {
168: getXPPParser().setInput(new CharArrayReader(text));
169: return parseDocument();
170: }
171:
172: /**
173: * <p>Reads a Document from the given stream</p>
174: *
175: * @param in <code>InputStream</code> to read from.
176: * @param systemID is the URI for the input
177: * @return the newly created Document instance
178: * @throws DocumentException if an error occurs during parsing.
179: */
180: public Document read(InputStream in, String systemID)
181: throws DocumentException, IOException,
182: XmlPullParserException {
183: return read(createReader(in), systemID);
184: }
185:
186: /**
187: * <p>Reads a Document from the given <code>Reader</code></p>
188: *
189: * @param reader is the reader for the input
190: * @param systemID is the URI for the input
191: * @return the newly created Document instance
192: * @throws DocumentException if an error occurs during parsing.
193: */
194: public Document read(Reader reader, String systemID)
195: throws DocumentException, IOException,
196: XmlPullParserException {
197: Document document = read(reader);
198: document.setName(systemID);
199: return document;
200: }
201:
202: // Properties
203: //-------------------------------------------------------------------------
204:
205: public MXParser getXPPParser() throws XmlPullParserException {
206: if (xppParser == null) {
207: xppParser = (MXParser) getXPPFactory().newPullParser();
208: }
209: return xppParser;
210: }
211:
212: public XmlPullParserFactory getXPPFactory()
213: throws XmlPullParserException {
214: if (xppFactory == null) {
215: xppFactory = XmlPullParserFactory.newInstance(
216: MXParser.class.getName(), null);
217: }
218: xppFactory.setNamespaceAware(true);
219: return xppFactory;
220: }
221:
222: public void setXPPFactory(XmlPullParserFactory xppFactory) {
223: this .xppFactory = xppFactory;
224: }
225:
226: /**
227: * @return the <code>DocumentFactory</code> used to create document objects
228: */
229: public DocumentFactory getDocumentFactory() {
230: if (factory == null) {
231: factory = DocumentFactory.getInstance();
232: }
233: return factory;
234: }
235:
236: /**
237: * <p>This sets the <code>DocumentFactory</code> used to create new documents.
238: * This method allows the building of custom DOM4J tree objects to be implemented
239: * easily using a custom derivation of {@link DocumentFactory}</p>
240: *
241: * @param factory <code>DocumentFactory</code> used to create DOM4J objects
242: */
243: public void setDocumentFactory(DocumentFactory factory) {
244: this .factory = factory;
245: }
246:
247: /**
248: * Adds the <code>ElementHandler</code> to be called when the
249: * specified path is encounted.
250: *
251: * @param path is the path to be handled
252: * @param handler is the <code>ElementHandler</code> to be called
253: * by the event based processor.
254: */
255: public void addHandler(String path, ElementHandler handler) {
256: getDispatchHandler().addHandler(path, handler);
257: }
258:
259: /**
260: * Removes the <code>ElementHandler</code> from the event based
261: * processor, for the specified path.
262: *
263: * @param path is the path to remove the <code>ElementHandler</code> for.
264: */
265: public void removeHandler(String path) {
266: getDispatchHandler().removeHandler(path);
267: }
268:
269: /**
270: * When multiple <code>ElementHandler</code> instances have been
271: * registered, this will set a default <code>ElementHandler</code>
272: * to be called for any path which does <b>NOT</b> have a handler
273: * registered.
274: *
275: * @param handler is the <code>ElementHandler</code> to be called
276: * by the event based processor.
277: */
278: public void setDefaultHandler(ElementHandler handler) {
279: getDispatchHandler().setDefaultHandler(handler);
280: }
281:
282: /**
283: * Returns the last time a full Document was read or a heartbeat was received. Hearbeats
284: * are represented as whitespaces or \n received while a Document is not being parsed.
285: *
286: * @return the time in milliseconds when the last document or heartbeat was received.
287: */
288: public long getLastActive() {
289: long lastHeartbeat = 0;
290: try {
291: lastHeartbeat = getXPPParser().getLastHeartbeat();
292: } catch (XmlPullParserException e) {
293: }
294: return lastActive > lastHeartbeat ? lastActive : lastHeartbeat;
295: }
296:
297: /*
298: * DANIELE: Add parse document by string
299: */
300: public Document parseDocument(String xml) throws DocumentException {
301: /*
302: // Long way with reuse of DocumentFactory.
303: DocumentFactory df = getDocumentFactory();
304: SAXReader reader = new SAXReader( df );
305: Document document = reader.read( new StringReader( xml );*/
306:
307: // Simple way
308: // TODO Optimize. Do not create a sax reader for each parsing
309: Document document = DocumentHelper.parseText(xml);
310:
311: return document;
312: }
313:
314: // Implementation methods
315: //-------------------------------------------------------------------------
316: public Document parseDocument() throws DocumentException,
317: IOException, XmlPullParserException {
318: DocumentFactory df = getDocumentFactory();
319: Document document = df.createDocument();
320: Element parent = null;
321: XmlPullParser pp = getXPPParser();
322: int count = 0;
323: while (true) {
324: int type = -1;
325: type = pp.nextToken();
326: switch (type) {
327: case XmlPullParser.PROCESSING_INSTRUCTION: {
328: String text = pp.getText();
329: int loc = text.indexOf(" ");
330: if (loc >= 0) {
331: document.addProcessingInstruction(text.substring(0,
332: loc), text.substring(loc + 1));
333: } else {
334: document.addProcessingInstruction(text, "");
335: }
336: break;
337: }
338: case XmlPullParser.COMMENT: {
339: if (parent != null) {
340: parent.addComment(pp.getText());
341: } else {
342: document.addComment(pp.getText());
343: }
344: break;
345: }
346: case XmlPullParser.CDSECT: {
347: String text = pp.getText();
348: if (parent != null) {
349: parent.addCDATA(text);
350: } else {
351: if (text.trim().length() > 0) {
352: throw new DocumentException(
353: "Cannot have text content outside of the root document");
354: }
355: }
356: break;
357:
358: }
359: case XmlPullParser.ENTITY_REF: {
360: String text = pp.getText();
361: if (parent != null) {
362: parent.addText(text);
363: } else {
364: if (text.trim().length() > 0) {
365: throw new DocumentException(
366: "Cannot have an entityref outside of the root document");
367: }
368: }
369: break;
370: }
371: case XmlPullParser.END_DOCUMENT: {
372: return document;
373: }
374: case XmlPullParser.START_TAG: {
375: QName qname = (pp.getPrefix() == null) ? df
376: .createQName(pp.getName(), pp.getNamespace())
377: : df.createQName(pp.getName(), pp.getPrefix(),
378: pp.getNamespace());
379: Element newElement = null;
380: // Do not include the namespace if this is the start tag of a new packet
381: // This avoids including "jabber:client", "jabber:server" or
382: // "jabber:component:accept"
383: if ("jabber:client".equals(qname.getNamespaceURI())
384: || "jabber:server".equals(qname
385: .getNamespaceURI())
386: || "jabber:connectionmanager".equals(qname
387: .getNamespaceURI())
388: || "jabber:component:accept".equals(qname
389: .getNamespaceURI())
390: || "http://jabber.org/protocol/httpbind"
391: .equals(qname.getNamespaceURI())) {
392: newElement = df.createElement(pp.getName());
393: } else {
394: newElement = df.createElement(qname);
395: }
396: int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
397: int nsEnd = pp.getNamespaceCount(pp.getDepth());
398: for (int i = nsStart; i < nsEnd; i++) {
399: if (pp.getNamespacePrefix(i) != null) {
400: newElement.addNamespace(pp
401: .getNamespacePrefix(i), pp
402: .getNamespaceUri(i));
403: }
404: }
405: for (int i = 0; i < pp.getAttributeCount(); i++) {
406: QName qa = (pp.getAttributePrefix(i) == null) ? df
407: .createQName(pp.getAttributeName(i)) : df
408: .createQName(pp.getAttributeName(i), pp
409: .getAttributePrefix(i), pp
410: .getAttributeNamespace(i));
411: newElement
412: .addAttribute(qa, pp.getAttributeValue(i));
413: }
414: if (parent != null) {
415: parent.add(newElement);
416: } else {
417: document.add(newElement);
418: }
419: parent = newElement;
420: count++;
421: break;
422: }
423: case XmlPullParser.END_TAG: {
424: if (parent != null) {
425: parent = parent.getParent();
426: }
427: count--;
428: if (count < 1) {
429: // Update the last time a Document was received
430: lastActive = System.currentTimeMillis();
431: return document;
432: }
433: break;
434: }
435: case XmlPullParser.TEXT: {
436: String text = pp.getText();
437: if (parent != null) {
438: parent.addText(text);
439: } else {
440: if (text.trim().length() > 0) {
441: throw new DocumentException(
442: "Cannot have text content outside of the root document");
443: }
444: }
445: break;
446: }
447: default: {
448: ;
449: }
450: }
451: }
452: }
453:
454: protected DispatchHandler getDispatchHandler() {
455: if (dispatchHandler == null) {
456: dispatchHandler = new DispatchHandler();
457: }
458: return dispatchHandler;
459: }
460:
461: protected void setDispatchHandler(DispatchHandler dispatchHandler) {
462: this .dispatchHandler = dispatchHandler;
463: }
464:
465: /**
466: * Factory method to create a Reader from the given InputStream.
467: */
468: protected Reader createReader(InputStream in) throws IOException {
469: return new BufferedReader(new InputStreamReader(in));
470: }
471:
472: private Reader createReader(InputStream in, String charSet)
473: throws UnsupportedEncodingException {
474: return new BufferedReader(new InputStreamReader(in, charSet));
475: }
476: }
477:
478: /*
479: * Redistribution and use of this software and associated documentation
480: * ("Software"), with or without modification, are permitted provided
481: * that the following conditions are met:
482: *
483: * 1. Redistributions of source code must retain copyright
484: * statements and notices. Redistributions must also contain a
485: * copy of this document.
486: *
487: * 2. Redistributions in binary form must reproduce the
488: * above copyright notice, this list of conditions and the
489: * following disclaimer in the documentation and/or other
490: * materials provided with the distribution.
491: *
492: * 3. The name "DOM4J" must not be used to endorse or promote
493: * products derived from this Software without prior written
494: * permission of MetaStuff, Ltd. For written permission,
495: * please contact dom4j-info@metastuff.com.
496: *
497: * 4. Products derived from this Software may not be called "DOM4J"
498: * nor may "DOM4J" appear in their names without prior written
499: * permission of MetaStuff, Ltd. DOM4J is a registered
500: * trademark of MetaStuff, Ltd.
501: *
502: * 5. Due credit should be given to the DOM4J Project -
503: * http://www.dom4j.org
504: *
505: * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
506: * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
507: * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
508: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
509: * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
510: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
511: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
512: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
513: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
514: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
515: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
516: * OF THE POSSIBILITY OF SUCH DAMAGE.
517: *
518: * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
519: *
520: * $Id: XMPPPacketReader.java 3190 2005-12-12 15:00:46Z gato $
521: */
|