001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit.xml;
039:
040: import java.io.IOException;
041: import java.io.StringReader;
042: import java.util.HashMap;
043: import java.util.Map;
044:
045: import javax.xml.parsers.DocumentBuilder;
046: import javax.xml.parsers.DocumentBuilderFactory;
047: import javax.xml.parsers.ParserConfigurationException;
048:
049: import org.apache.commons.logging.Log;
050: import org.apache.commons.logging.LogFactory;
051: import org.w3c.dom.Document;
052: import org.w3c.dom.NamedNodeMap;
053: import org.w3c.dom.Node;
054: import org.w3c.dom.NodeList;
055: import org.xml.sax.ErrorHandler;
056: import org.xml.sax.InputSource;
057: import org.xml.sax.SAXException;
058: import org.xml.sax.SAXParseException;
059:
060: import com.gargoylesoftware.htmlunit.Page;
061: import com.gargoylesoftware.htmlunit.WebResponse;
062: import com.gargoylesoftware.htmlunit.html.DomCData;
063: import com.gargoylesoftware.htmlunit.html.DomComment;
064: import com.gargoylesoftware.htmlunit.html.DomNode;
065: import com.gargoylesoftware.htmlunit.html.DomText;
066: import com.gargoylesoftware.htmlunit.html.HtmlElement;
067: import com.gargoylesoftware.htmlunit.html.HtmlPage;
068:
069: /**
070: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
071: *
072: * Provides facility method to work with xml responses.
073: *
074: * @version $Revision: 2132 $
075: * @author Marc Guillemot
076: * @author Ahmed Ashour
077: */
078: public final class XmlUtil {
079: private static final ErrorHandler DISCARD_MESSAGES_HANDLER = new ErrorHandler() {
080: /**
081: * Does nothing as we're not interested in.
082: * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
083: */
084: public void error(final SAXParseException exception)
085: throws SAXException {
086: // Does nothing as we're not interested in.
087: }
088:
089: /**
090: * Does nothing as we're not interested in.
091: * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
092: */
093: public void fatalError(final SAXParseException exception)
094: throws SAXException {
095:
096: // Does nothing as we're not interested in.
097: }
098:
099: /**
100: * Does nothing as we're not interested in.
101: * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
102: */
103: public void warning(final SAXParseException exception)
104: throws SAXException {
105:
106: // Does nothing as we're not interested in.
107: }
108: };
109:
110: /**
111: * Utility class, hide constructor
112: */
113: private XmlUtil() {
114: // nothing
115: }
116:
117: /**
118: * Builds a document from the content of the webresponse.
119: * A warning is logged if an exception is thrown while parsing the xml content
120: * (for instance when the content is not a valid xml and can't be parsed).
121: *
122: * @param webResponse The response from the server
123: * @throws IOException If the page could not be created
124: * @return the parse result
125: * @throws SAXException if the parsing fails
126: * @throws ParserConfigurationException if a DocumentBuilder cannot be created
127: */
128: public static Document buildDocument(final WebResponse webResponse)
129: throws IOException, SAXException,
130: ParserConfigurationException {
131:
132: final DocumentBuilderFactory factory = DocumentBuilderFactory
133: .newInstance();
134: factory.setNamespaceAware(true);
135: final InputSource source = new InputSource(new StringReader(
136: webResponse.getContentAsString()));
137: final DocumentBuilder builder = factory.newDocumentBuilder();
138: builder.setErrorHandler(DISCARD_MESSAGES_HANDLER);
139: return builder.parse(source);
140: }
141:
142: /**
143: * Return the log object for this web client
144: * @return The log object
145: */
146: protected static Log getLog() {
147: return LogFactory.getLog(XmlUtil.class);
148: }
149:
150: /**
151: * Recursively appends a {@link Node} child to {@link DomNode} parent.
152: *
153: * @param page the owner page of {@link XmlElement}s to be created.
154: * @param parent the parent DomNode.
155: * @param child the child Node.
156: */
157: public static void appendChild(final Page page,
158: final DomNode parent, final Node child) {
159: final DomNode childXml = createFrom(page, child);
160: parent.appendDomChild(childXml);
161: copy(page, child, childXml);
162: }
163:
164: private static DomNode createFrom(final Page page,
165: final org.w3c.dom.Node source) {
166: if (source.getNodeType() == Node.TEXT_NODE) {
167: return new DomText(page, source.getNodeValue());
168: }
169: final Map attributes/* String, XmlAttr*/= new HashMap();
170: final NamedNodeMap nodeAttributes = source.getAttributes();
171: for (int i = 0; i < nodeAttributes.getLength(); i++) {
172: final Node attribute = nodeAttributes.item(i);
173: final String qualifiedName;
174: if (attribute.getPrefix() != null) {
175: qualifiedName = attribute.getPrefix() + ':'
176: + attribute.getLocalName();
177: } else {
178: qualifiedName = attribute.getLocalName();
179: }
180: final XmlAttr xmlAttribute = new XmlAttr(page, attribute
181: .getNamespaceURI(), qualifiedName, attribute
182: .getNodeValue());
183: attributes.put(attribute.getNodeName(), xmlAttribute);
184: }
185: String localName = source.getLocalName();
186: if (page instanceof HtmlPage) {
187: localName = localName.toUpperCase();
188: }
189: final String qualifiedName;
190: if (source.getPrefix() == null) {
191: qualifiedName = localName;
192: } else {
193: qualifiedName = source.getPrefix() + ':' + localName;
194: }
195: return new XmlElement(source.getNamespaceURI(), qualifiedName,
196: page, attributes);
197: }
198:
199: /**
200: * Copy all children from 'source' to 'dest'
201: * @param source The Node to copy from.
202: * @param dest The DomNode to copy to.
203: */
204: private static void copy(final Page page,
205: final org.w3c.dom.Node source, final DomNode dest) {
206: final NodeList nodeChildren = source.getChildNodes();
207: for (int i = 0; i < nodeChildren.getLength(); i++) {
208: final Node child = nodeChildren.item(i);
209: switch (child.getNodeType()) {
210: case Node.ELEMENT_NODE:
211: final DomNode childXml = createFrom(page, child);
212: dest.appendDomChild(childXml);
213: copy(page, child, childXml);
214: break;
215:
216: case Node.TEXT_NODE:
217: final DomText text = new DomText(page, child
218: .getNodeValue());
219: dest.appendDomChild(text);
220: break;
221:
222: case Node.CDATA_SECTION_NODE:
223: final DomCData cdata = new DomCData(page, child
224: .getNodeValue());
225: dest.appendDomChild(cdata);
226: break;
227:
228: case Node.COMMENT_NODE:
229: final DomComment comment = new DomComment(page, child
230: .getNodeValue());
231: dest.appendDomChild(comment);
232: break;
233:
234: default:
235: getLog().warn(
236: "NodeType " + child.getNodeType() + " ("
237: + child.getNodeName()
238: + ") is not yet supported.");
239: }
240: }
241: }
242:
243: /**
244: * Search for the namespace URI of the given prefix, starting from the specified element.
245: * @param element The element to start searching from.
246: * @param prefix The namespace prefix.
247: * @return the namespace URI bound to the prefix; or null if there is no such namespace.
248: * @see #lookupNamespaceURI(HtmlElement, String)
249: */
250: public static String lookupNamespaceURI(final XmlElement element,
251: final String prefix) {
252: String uri = element.getAttributeValue("xmlns:" + prefix);
253: if (uri == XmlElement.ATTRIBUTE_NOT_DEFINED) {
254: final DomNode parentNode = element.getParentDomNode();
255: if (parentNode instanceof XmlElement) {
256: uri = lookupNamespaceURI((XmlElement) parentNode,
257: prefix);
258: }
259: }
260: return uri;
261: }
262:
263: /**
264: * Search for the namespace URI of the given prefix, starting from the specified element.
265: * @param element The element to start searching from.
266: * @param prefix The namespace prefix.
267: * @return the namespace URI bound to the prefix; or null if there is no such namespace.
268: * @see #lookupNamespaceURI(XmlElement, String)
269: */
270: public static String lookupNamespaceURI(final HtmlElement element,
271: final String prefix) {
272: String uri = element.getAttributeValue("xmlns:" + prefix);
273: if (uri == HtmlElement.ATTRIBUTE_NOT_DEFINED) {
274: final DomNode parentNode = element.getParentDomNode();
275: if (parentNode instanceof HtmlElement) {
276: uri = lookupNamespaceURI((HtmlElement) parentNode,
277: prefix);
278: }
279: }
280: return uri;
281: }
282: }
|