001: /*--
002:
003: Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009:
010: 1. Redistributions of source code must retain the above copyright
011: notice, this list of conditions, and the following disclaimer.
012:
013: 2. Redistributions in binary form must reproduce the above copyright
014: notice, this list of conditions, and the disclaimer that follows
015: these conditions in the documentation and/or other materials
016: provided with the distribution.
017:
018: 3. The name "JDOM" must not be used to endorse or promote products
019: derived from this software without prior written permission. For
020: written permission, please contact <request_AT_jdom_DOT_org>.
021:
022: 4. Products derived from this software may not be called "JDOM", nor
023: may "JDOM" appear in their name, without prior written permission
024: from the JDOM Project Management <request_AT_jdom_DOT_org>.
025:
026: In addition, we request (but do not require) that you include in the
027: end-user documentation provided with the redistribution and/or in the
028: software itself an acknowledgement equivalent to the following:
029: "This product includes software developed by the
030: JDOM Project (http://www.jdom.org/)."
031: Alternatively, the acknowledgment may be graphical using the logos
032: available at http://www.jdom.org/images/logos.
033:
034: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
035: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
036: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
037: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
038: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
039: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
040: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
041: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
042: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
043: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
044: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
045: SUCH DAMAGE.
046:
047: This software consists of voluntary contributions made by many
048: individuals on behalf of the JDOM Project and was originally
049: created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
050: Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
051: on the JDOM Project, please see <http://www.jdom.org/>.
052:
053: */
054:
055: package org.apache.cxf.aegis.util.jdom;
056:
057: import java.io.InputStream;
058: import java.io.Reader;
059: import java.util.HashMap;
060: import java.util.Iterator;
061: import java.util.Map;
062:
063: import javax.xml.stream.XMLInputFactory;
064: import javax.xml.stream.XMLStreamConstants;
065: import javax.xml.stream.XMLStreamException;
066: import javax.xml.stream.XMLStreamReader;
067:
068: import org.apache.cxf.staxutils.StaxUtils;
069: import org.jdom.Attribute;
070: import org.jdom.Content;
071: import org.jdom.Document;
072: import org.jdom.Element;
073: import org.jdom.JDOMFactory;
074: import org.jdom.Namespace;
075: import org.jdom.UncheckedJDOMFactory;
076:
077: /**
078: * Builds a JDOM {@link org.jdom.Document org.jdom.Document} using a
079: * {@link javax.xml.stream.XMLStreamReader}.
080: *
081: * @version $Revision: 527617 $, $Date: 2006-02-15 15:21:25 -0500 (Wed, 15 Feb
082: * 2006) $
083: * @author Tatu Saloranta
084: * @author Bradley S. Huffman
085: */
086: public class StaxBuilder {
087:
088: /**
089: * Map that contains conversion from textual attribute types StAX uses, to
090: * int values JDOM uses.
091: */
092: private static final Map<String, Integer> ATTR_TYPES = new HashMap<String, Integer>(
093: 32);
094: static {
095: ATTR_TYPES.put("CDATA", new Integer(Attribute.CDATA_TYPE));
096: ATTR_TYPES.put("cdata", new Integer(Attribute.CDATA_TYPE));
097: ATTR_TYPES.put("ID", new Integer(Attribute.ID_TYPE));
098: ATTR_TYPES.put("id", new Integer(Attribute.ID_TYPE));
099: ATTR_TYPES.put("IDREF", new Integer(Attribute.IDREF_TYPE));
100: ATTR_TYPES.put("idref", new Integer(Attribute.IDREF_TYPE));
101: ATTR_TYPES.put("IDREFS", new Integer(Attribute.IDREFS_TYPE));
102: ATTR_TYPES.put("idrefs", new Integer(Attribute.IDREFS_TYPE));
103: ATTR_TYPES.put("ENTITY", new Integer(Attribute.ENTITY_TYPE));
104: ATTR_TYPES.put("entity", new Integer(Attribute.ENTITY_TYPE));
105: ATTR_TYPES
106: .put("ENTITIES", new Integer(Attribute.ENTITIES_TYPE));
107: ATTR_TYPES
108: .put("entities", new Integer(Attribute.ENTITIES_TYPE));
109: ATTR_TYPES.put("NMTOKEN", new Integer(Attribute.NMTOKEN_TYPE));
110: ATTR_TYPES.put("nmtoken", new Integer(Attribute.NMTOKEN_TYPE));
111: ATTR_TYPES
112: .put("NMTOKENS", new Integer(Attribute.NMTOKENS_TYPE));
113: ATTR_TYPES
114: .put("nmtokens", new Integer(Attribute.NMTOKENS_TYPE));
115: ATTR_TYPES
116: .put("NOTATION", new Integer(Attribute.NOTATION_TYPE));
117: ATTR_TYPES
118: .put("notation", new Integer(Attribute.NOTATION_TYPE));
119: ATTR_TYPES.put("ENUMERATED", new Integer(
120: Attribute.ENUMERATED_TYPE));
121: ATTR_TYPES.put("enumerated", new Integer(
122: Attribute.ENUMERATED_TYPE));
123: }
124:
125: /**
126: * Whether ignorable white space should be ignored, ie not added in the
127: * resulting JDOM tree. If true, it will be ignored; if false, it will be
128: * added in the tree. Default value if false.
129: */
130: protected boolean cfgIgnoreWS;
131:
132: /** The factory for creating new JDOM objects */
133: private JDOMFactory factory;
134:
135: private XMLInputFactory xifactory;
136:
137: private Map additionalNamespaces;
138:
139: /**
140: * Default constructor.
141: */
142: public StaxBuilder() {
143: xifactory = StaxUtils.getXMLInputFactory();
144: }
145:
146: public StaxBuilder(Map namespaces) {
147: xifactory = StaxUtils.getXMLInputFactory();
148: this .additionalNamespaces = namespaces;
149: }
150:
151: public StaxBuilder(XMLInputFactory xifactory) {
152: this .xifactory = xifactory;
153: }
154:
155: public Map getAdditionalNamespaces() {
156: return additionalNamespaces;
157: }
158:
159: public void setAdditionalNamespaces(Map additionalNamespaces) {
160: this .additionalNamespaces = additionalNamespaces;
161: }
162:
163: /*
164: * This sets a custom JDOMFactory for the builder. Use this to build the
165: * tree with your own subclasses of the JDOM classes. @param factory <code>JDOMFactory</code>
166: * to use
167: */
168: public void setFactory(JDOMFactory f) {
169: factory = f;
170: }
171:
172: public void setIgnoreWhitespace(boolean state) {
173: cfgIgnoreWS = state;
174: }
175:
176: /**
177: * Returns the current {@link org.jdom.JDOMFactory} in use, if one has been
178: * previously set with {@link #setFactory}, otherwise null.
179: *
180: * @return the factory builder will use
181: */
182: public JDOMFactory getFactory() {
183: return factory;
184: }
185:
186: /**
187: * This will build a JDOM tree given a StAX stream reader.
188: *
189: * @param r Stream reader from which input is read.
190: * @return <code>Document</code> - JDOM document object.
191: * @throws XMLStreamException If the reader threw such exception (to
192: * indicate a parsing or I/O problem)
193: */
194: public Document build(XMLStreamReader r) throws XMLStreamException {
195: /*
196: * Should we do sanity checking to see that r is positioned at
197: * beginning? Not doing so will allow creating documents from sub-trees,
198: * though?
199: */
200: JDOMFactory f = factory;
201: if (f == null) {
202: f = new UncheckedJDOMFactory();
203: }
204: Document doc = f.document(null);
205: buildTree(f, r, doc);
206: return doc;
207: }
208:
209: public Document build(InputStream is) throws XMLStreamException {
210: return build(xifactory.createXMLStreamReader(is));
211: }
212:
213: public Document build(Reader reader) throws XMLStreamException {
214: return build(xifactory.createXMLStreamReader(reader));
215: }
216:
217: /**
218: * This takes a <code>XMLStreamReader</code> and builds up a JDOM tree.
219: * Recursion has been eliminated by using local stack of open elements; this
220: * improves performance somewhat (classic
221: * recursion-by-iteration-and-explicit stack transformation)
222: *
223: * @param node <code>Code</node> to examine.
224: * @param doc JDOM <code>Document</code> being built.
225: */
226: @SuppressWarnings("fallthrough")
227: private void buildTree(JDOMFactory f, XMLStreamReader r,
228: Document doc) throws XMLStreamException {
229: Element current = null; // At top level
230: int event = r.getEventType();
231:
232: // if we're at the start then we need to do a next
233: if (event == -1) {
234: event = r.next();
235: }
236:
237: while (true) {
238: boolean noadd = false;
239: Content child = null;
240:
241: switch (event) {
242: case XMLStreamConstants.CDATA:
243: child = f.cdata(r.getText());
244: break;
245:
246: case XMLStreamConstants.SPACE:
247: if (cfgIgnoreWS) {
248: noadd = true;
249: break;
250: }
251: // fall through
252:
253: case XMLStreamConstants.CHARACTERS:
254: /*
255: * Small complication: although (ignorable) white space is
256: * allowed in prolog/epilog, and StAX may report such event,
257: * JDOM barfs if trying to add it. Thus, let's just ignore all
258: * textual stuff outside the tree:
259: */
260: if (current == null) {
261: noadd = true;
262: break;
263: }
264: child = f.text(r.getText());
265: break;
266:
267: case XMLStreamConstants.COMMENT:
268: child = f.comment(r.getText());
269: break;
270:
271: case XMLStreamConstants.END_DOCUMENT:
272: return;
273:
274: case XMLStreamConstants.END_ELEMENT:
275: /**
276: * If current.getParentElement() previously returned null and we
277: * get this event again we shouldn't bail out with a
278: * NullPointerException
279: */
280: if (current != null) {
281: current = current.getParentElement();
282: }
283: noadd = true;
284: break;
285:
286: case XMLStreamConstants.ENTITY_DECLARATION:
287: case XMLStreamConstants.NOTATION_DECLARATION:
288: /*
289: * Shouldn't really get these, but maybe some stream readers do
290: * provide the info. If so, better ignore it -- DTD event should
291: * have most/all we need.
292: */
293: noadd = true;
294: break;
295:
296: case XMLStreamConstants.ENTITY_REFERENCE:
297: child = f.entityRef(r.getLocalName());
298: break;
299:
300: case XMLStreamConstants.PROCESSING_INSTRUCTION:
301: child = f.processingInstruction(r.getPITarget(), r
302: .getPIData());
303: break;
304:
305: case XMLStreamConstants.START_ELEMENT: {
306: // Ok, need to add a new element and simulate recursion
307: Element newElem = null;
308: String nsURI = r.getNamespaceURI();
309: String elemPrefix = r.getPrefix(); // needed for special
310: // handling of elem's
311: // namespace
312: String ln = r.getLocalName();
313:
314: if (nsURI == null || nsURI.length() == 0) {
315: if (elemPrefix == null || elemPrefix.length() == 0) {
316: newElem = f.element(ln);
317: } else {
318: /*
319: * Happens when a prefix is bound to the default (empty)
320: * namespace...
321: */
322: newElem = f.element(ln, elemPrefix, "");
323: }
324: } else {
325: newElem = f.element(ln, elemPrefix, nsURI);
326: }
327:
328: /*
329: * Let's add element right away (probably have to do it to bind
330: * attribute namespaces, too)
331: */
332: if (current == null) { // at root
333: doc.setRootElement(newElem);
334: if (additionalNamespaces != null) {
335: for (Iterator iter = additionalNamespaces
336: .keySet().iterator(); iter.hasNext();) {
337: String prefix = (String) iter.next();
338: String uri = (String) additionalNamespaces
339: .get(prefix);
340:
341: newElem.addNamespaceDeclaration(Namespace
342: .getNamespace(prefix, uri));
343: }
344: }
345: } else {
346: f.addContent(current, newElem);
347: }
348:
349: // Any declared namespaces?
350: int i;
351: int len;
352: for (i = 0, len = r.getNamespaceCount(); i < len; ++i) {
353: String prefix = r.getNamespacePrefix(i);
354: Namespace ns = Namespace.getNamespace(prefix, r
355: .getNamespaceURI(i));
356: // JDOM has special handling for element's "own" ns:
357: if (prefix != null && prefix.equals(elemPrefix)) {
358: // already set by when it was constructed...
359: } else {
360: f.addNamespaceDeclaration(newElem, ns);
361: }
362: }
363:
364: // And then the attributes:
365: for (i = 0, len = r.getAttributeCount(); i < len; ++i) {
366: String prefix = r.getAttributePrefix(i);
367: Namespace ns;
368:
369: if (prefix == null || prefix.length() == 0) {
370: // Attribute not in any namespace
371: ns = Namespace.NO_NAMESPACE;
372: } else {
373: ns = newElem.getNamespace(prefix);
374:
375: }
376: Attribute attr = f.attribute(r
377: .getAttributeLocalName(i), r
378: .getAttributeValue(i), resolveAttrType(r
379: .getAttributeType(i)), ns);
380: f.setAttribute(newElem, attr);
381: }
382: // And then 'push' new element...
383: current = newElem;
384:
385: // Already added the element, can continue
386: noadd = true;
387: break;
388: }
389: case XMLStreamConstants.START_DOCUMENT:
390: /*
391: * This should only be received at the beginning of document...
392: * so, should we indicate the problem or not?
393: */
394: /*
395: * For now, let it pass: maybe some (broken) readers pass that
396: * info as first event in beginning of doc?
397: */
398:
399: case XMLStreamConstants.DTD:
400: /*
401: * !!! Note: StAX does not expose enough information about
402: * doctype declaration (specifically, public and system id!);
403: * should (re-)parse information... not yet implemented
404: */
405: // TBI
406: // continue main_loop;
407: // Should never get these, from a stream reader:
408: /*
409: * (commented out entries are just FYI; default catches them
410: * all)
411: */
412:
413: // case XMLStreamConstants.ATTRIBUTE:
414: // case XMLStreamConstants.NAMESPACE:
415: default:
416: /*
417: * throw new XMLStreamException("Unrecognized iterator event
418: * type: " + r.getEventType() + "; should not receive such types
419: * (broken stream reader?)");
420: */
421: break;
422: }
423:
424: if (!noadd && child != null) {
425: if (current == null) {
426: f.addContent(doc, child);
427: } else {
428: f.addContent(current, child);
429: }
430: }
431:
432: if (r.hasNext()) {
433: event = r.next();
434: } else {
435: break;
436: }
437: }
438: }
439:
440: private static int resolveAttrType(String typeStr) {
441: if (typeStr != null && typeStr.length() > 0) {
442: Integer i = ATTR_TYPES.get(typeStr);
443: if (i != null) {
444: return i.intValue();
445: }
446: }
447: return Attribute.UNDECLARED_TYPE;
448: }
449: }
|