001: /*
002: $Id: XmlParser.java 4132 2006-10-18 08:24:58Z paulk $
003:
004: Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
005:
006: Redistribution and use of this software and associated documentation
007: ("Software"), with or without modification, are permitted provided
008: that the following conditions are met:
009:
010: 1. Redistributions of source code must retain copyright
011: statements and notices. Redistributions must also contain a
012: copy of this document.
013:
014: 2. Redistributions in binary form must reproduce the
015: above copyright notice, this list of conditions and the
016: following disclaimer in the documentation and/or other
017: materials provided with the distribution.
018:
019: 3. The name "groovy" must not be used to endorse or promote
020: products derived from this Software without prior written
021: permission of The Codehaus. For written permission,
022: please contact info@codehaus.org.
023:
024: 4. Products derived from this Software may not be called "groovy"
025: nor may "groovy" appear in their names without prior written
026: permission of The Codehaus. "groovy" is a registered
027: trademark of The Codehaus.
028:
029: 5. Due credit should be given to The Codehaus -
030: http://groovy.codehaus.org/
031:
032: THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
033: ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
034: NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
035: FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
036: THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
037: INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038: (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
039: SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
040: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
041: STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
042: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
043: OF THE POSSIBILITY OF SUCH DAMAGE.
044:
045: */
046: package groovy.util;
047:
048: import groovy.xml.QName;
049: import groovy.xml.FactorySupport;
050:
051: import java.io.File;
052: import java.io.FileInputStream;
053: import java.io.IOException;
054: import java.io.InputStream;
055: import java.io.Reader;
056: import java.io.StringReader;
057: import java.util.ArrayList;
058: import java.util.HashMap;
059: import java.util.List;
060: import java.util.Map;
061:
062: import javax.xml.parsers.ParserConfigurationException;
063: import javax.xml.parsers.SAXParser;
064: import javax.xml.parsers.SAXParserFactory;
065:
066: import org.xml.sax.*;
067:
068: /**
069: * A helper class for parsing XML into a tree of Node instances for
070: * a simple way of processing XML. This parser does not preserve the
071: * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
072: * This parser ignores comments and processing instructions and converts the
073: * XML into a Node for each element in the XML with attributes
074: * and child Nodes and Strings. This simple model is sufficient for
075: * most simple use cases of processing XML.
076: *
077: * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
078: * @version $Revision: 4132 $
079: */
080: public class XmlParser implements ContentHandler {
081:
082: private StringBuffer bodyText = new StringBuffer();
083: private List stack = new ArrayList();
084: private Locator locator;
085: private XMLReader reader;
086: private Node parent;
087: private boolean trimWhitespace = true;
088:
089: public XmlParser() throws ParserConfigurationException,
090: SAXException {
091: this (false, true);
092: }
093:
094: public XmlParser(boolean validating, boolean namespaceAware)
095: throws ParserConfigurationException, SAXException {
096: SAXParserFactory factory = FactorySupport
097: .createSaxParserFactory();
098: factory.setNamespaceAware(namespaceAware);
099: factory.setValidating(validating);
100: reader = factory.newSAXParser().getXMLReader();
101: }
102:
103: public XmlParser(XMLReader reader) {
104: this .reader = reader;
105: }
106:
107: public XmlParser(SAXParser parser) throws SAXException {
108: reader = parser.getXMLReader();
109: }
110:
111: /**
112: * Parses the content of the given file as XML turning it into a tree
113: * of Nodes
114: */
115: public Node parse(File file) throws IOException, SAXException {
116: InputSource input = new InputSource(new FileInputStream(file));
117: input.setSystemId("file://" + file.getAbsolutePath());
118: getXMLReader().parse(input);
119: return parent;
120:
121: }
122:
123: /**
124: * Parse the content of the specified input source into a tree of Nodes.
125: */
126: public Node parse(InputSource input) throws IOException,
127: SAXException {
128: getXMLReader().parse(input);
129: return parent;
130: }
131:
132: /**
133: * Parse the content of the specified input stream into a tree of Nodes.
134: * Note that using this method will not provide the parser with any URI
135: * for which to find DTDs etc
136: */
137: public Node parse(InputStream input) throws IOException,
138: SAXException {
139: InputSource is = new InputSource(input);
140: getXMLReader().parse(is);
141: return parent;
142: }
143:
144: /**
145: * Parse the content of the specified reader into a tree of Nodes.
146: * Note that using this method will not provide the parser with any URI
147: * for which to find DTDs etc
148: */
149: public Node parse(Reader in) throws IOException, SAXException {
150: InputSource is = new InputSource(in);
151: getXMLReader().parse(is);
152: return parent;
153: }
154:
155: /**
156: * Parse the content of the specified URI into a tree of Nodes
157: */
158: public Node parse(String uri) throws IOException, SAXException {
159: InputSource is = new InputSource(uri);
160: getXMLReader().parse(is);
161: return parent;
162: }
163:
164: /**
165: * A helper method to parse the given text as XML
166: *
167: * @param text
168: */
169: public Node parseText(String text) throws IOException, SAXException {
170: return parse(new StringReader(text));
171: }
172:
173: // Delegated XMLReader methods
174: //------------------------------------------------------------------------
175:
176: /* (non-Javadoc)
177: * @see org.xml.sax.XMLReader#getDTDHandler()
178: */
179: public DTDHandler getDTDHandler() {
180: return this .reader.getDTDHandler();
181: }
182:
183: /* (non-Javadoc)
184: * @see org.xml.sax.XMLReader#getEntityResolver()
185: */
186: public EntityResolver getEntityResolver() {
187: return this .reader.getEntityResolver();
188: }
189:
190: /* (non-Javadoc)
191: * @see org.xml.sax.XMLReader#getErrorHandler()
192: */
193: public ErrorHandler getErrorHandler() {
194: return this .reader.getErrorHandler();
195: }
196:
197: /* (non-Javadoc)
198: * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
199: */
200: public boolean getFeature(final String uri)
201: throws SAXNotRecognizedException, SAXNotSupportedException {
202: return this .reader.getFeature(uri);
203: }
204:
205: /* (non-Javadoc)
206: * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
207: */
208: public Object getProperty(final String uri)
209: throws SAXNotRecognizedException, SAXNotSupportedException {
210: return this .reader.getProperty(uri);
211: }
212:
213: /* (non-Javadoc)
214: * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
215: */
216: public void setDTDHandler(final DTDHandler dtdHandler) {
217: this .reader.setDTDHandler(dtdHandler);
218: }
219:
220: /* (non-Javadoc)
221: * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
222: */
223: public void setEntityResolver(final EntityResolver entityResolver) {
224: this .reader.setEntityResolver(entityResolver);
225: }
226:
227: /* (non-Javadoc)
228: * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
229: */
230: public void setErrorHandler(final ErrorHandler errorHandler) {
231: this .reader.setErrorHandler(errorHandler);
232: }
233:
234: /* (non-Javadoc)
235: * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
236: */
237: public void setFeature(final String uri, final boolean value)
238: throws SAXNotRecognizedException, SAXNotSupportedException {
239: this .reader.setFeature(uri, value);
240: }
241:
242: /* (non-Javadoc)
243: * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
244: */
245: public void setProperty(final String uri, final Object value)
246: throws SAXNotRecognizedException, SAXNotSupportedException {
247: this .reader.setProperty(uri, value);
248: }
249:
250: // ContentHandler interface
251: //-------------------------------------------------------------------------
252: public void startDocument() throws SAXException {
253: parent = null;
254: }
255:
256: public void endDocument() throws SAXException {
257: stack.clear();
258: }
259:
260: public void startElement(String namespaceURI, String localName,
261: String qName, Attributes list) throws SAXException {
262: addTextToNode();
263:
264: Object name = getElementName(namespaceURI, localName, qName);
265:
266: int size = list.getLength();
267: Map attributes = new HashMap(size);
268: for (int i = 0; i < size; i++) {
269: Object attributeName = getElementName(list.getURI(i), list
270: .getLocalName(i), list.getQName(i));
271: String value = list.getValue(i);
272: attributes.put(attributeName, value);
273: }
274: parent = new Node(parent, name, attributes, new ArrayList());
275: stack.add(parent);
276: }
277:
278: public void endElement(String namespaceURI, String localName,
279: String qName) throws SAXException {
280: addTextToNode();
281:
282: if (!stack.isEmpty()) {
283: stack.remove(stack.size() - 1);
284: if (!stack.isEmpty()) {
285: parent = (Node) stack.get(stack.size() - 1);
286: }
287: }
288: }
289:
290: public void characters(char buffer[], int start, int length)
291: throws SAXException {
292: bodyText.append(buffer, start, length);
293: }
294:
295: public void startPrefixMapping(String prefix, String namespaceURI)
296: throws SAXException {
297: }
298:
299: public void endPrefixMapping(String prefix) throws SAXException {
300: }
301:
302: public void ignorableWhitespace(char buffer[], int start, int len)
303: throws SAXException {
304: }
305:
306: public void processingInstruction(String target, String data)
307: throws SAXException {
308: }
309:
310: public Locator getDocumentLocator() {
311: return locator;
312: }
313:
314: public void setDocumentLocator(Locator locator) {
315: this .locator = locator;
316: }
317:
318: public void skippedEntity(String name) throws SAXException {
319: }
320:
321: // Implementation methods
322: //-------------------------------------------------------------------------
323: protected XMLReader getXMLReader() {
324: reader.setContentHandler(this );
325: return reader;
326: }
327:
328: protected void addTextToNode() {
329: String text = bodyText.toString();
330: if (trimWhitespace) {
331: text = text.trim();
332: }
333: if (text.length() > 0) {
334: parent.children().add(text);
335: }
336: bodyText = new StringBuffer();
337: }
338:
339: protected Object getElementName(String namespaceURI,
340: String localName, String qName) throws SAXException {
341: String name = localName;
342: if ((name == null) || (name.length() < 1)) {
343: name = qName;
344: }
345: if (namespaceURI == null || namespaceURI.length() <= 0) {
346: return name;
347: } else {
348: return new QName(namespaceURI, name, qName);
349: }
350: }
351: }
|