001: package com.mockrunner.util.web;
002:
003: import java.io.StringReader;
004: import java.util.List;
005:
006: import org.apache.xerces.parsers.DOMParser;
007: import org.cyberneko.html.HTMLConfiguration;
008: import org.jdom.Element;
009: import org.jdom.input.DOMBuilder;
010: import org.jdom.output.XMLOutputter;
011: import org.xml.sax.InputSource;
012:
013: import com.mockrunner.base.NestedApplicationException;
014:
015: /**
016: * Util class for HTML and XML parsing.
017: */
018: public class XmlUtil {
019: /**
020: * Convinience method for HTML fragments. Returns the body
021: * as JDOM <code>Element</code>.
022: *
023: * If an HTML documents looks like this:
024: * <pre>
025: * <html>
026: * <head>
027: * </head>
028: * <body>
029: * <h1>
030: * </h1>
031: * </body>
032: * </html>
033: * </pre>
034: *
035: * the method returns the h1 tag as <code>Element</code>.
036: * @param document the <code>org.jdom.Document</code>
037: * @return the body <code>Element</code>
038: */
039: public static Element getBodyFragmentFromJDOMDocument(
040: org.jdom.Document document) {
041: Element element = document.getRootElement().getChild("BODY");
042: if (null == element) {
043: element = document.getRootElement().getChild("body");
044: }
045: if (null != element) {
046: List childs = element.getChildren();
047: if (null != childs && childs.size() > 0)
048: return (Element) childs.get(0);
049: }
050: return null;
051: }
052:
053: /**
054: * @deprecated use {@link #getBodyFragmentFromJDOMDocument}
055: */
056: public static Element getBodyFragmentJDOMDocument(
057: org.jdom.Document document) {
058: return getBodyFragmentFromJDOMDocument(document);
059: }
060:
061: /**
062: * Returns the documents XML content as a string.
063: * @param document the <code>org.jdom.Document</code>
064: * @return the output as string
065: */
066: public static String createStringFromJDOMDocument(
067: org.jdom.Document document) {
068: try {
069: return new XMLOutputter().outputString(document);
070: } catch (Exception exc) {
071: throw new NestedApplicationException(exc);
072: }
073: }
074:
075: /**
076: * Creates a JDOM <code>Document</code> from a specified
077: * W3C <code>Document</code>.
078: * @param document the <code>org.w3c.dom.Document</code>
079: * @return the <code>org.jdom.Document</code>
080: */
081: public static org.jdom.Document createJDOMDocument(
082: org.w3c.dom.Document document) {
083: return new DOMBuilder().build(document);
084: }
085:
086: /**
087: * Returns a parser suitable for parsing HTML documents.
088: * The NekoHTML parser is used with some settings to
089: * preserve case of tag names and disable namespace processing.
090: * This method is used by {@link #parseHTML}.
091: * @return instance of <code>org.apache.xerces.parsers.DOMParser</code>
092: * with Neko configuration
093: */
094: public static DOMParser getHTMLParser() {
095: try {
096: HTMLConfiguration config = new HTMLConfiguration();
097: config.setProperty(
098: "http://cyberneko.org/html/properties/names/elems",
099: "match");
100: config.setProperty(
101: "http://cyberneko.org/html/properties/names/attrs",
102: "no-change");
103: DOMParser parser = new DOMParser(config);
104: return parser;
105: } catch (Exception exc) {
106: throw new NestedApplicationException(exc);
107: }
108: }
109:
110: /**
111: * Parses the specified HTML with the NekoHTML parser.
112: * If you want to use another HTML parser or configure
113: * the NekoHTML parser with special features, you can use
114: * the <code>parse</code> method.
115: * @param source the HTML as String
116: * @return the parsed document as org.w3c.dom.Document
117: */
118: public static org.w3c.dom.Document parseHTML(String source) {
119: try {
120: return parse(getHTMLParser(), source);
121: } catch (Exception exc) {
122: throw new NestedApplicationException(exc);
123: }
124: }
125:
126: /**
127: * Parses the specified XML with the specified parser.
128: * The main purpose of this method is to use the NekoHTML
129: * parser with custom features and properties. If you can live
130: * with the settings provided by Mockrunner, you can use
131: * {@link #parseHTML}.
132: * @param parser the parser (must extend
133: * <code>org.apache.xerces.parsers.DOMParser</code>),
134: * e.g. the one returned by {@link #getHTMLParser}
135: * @param source the XML as String
136: * @return the parsed document as org.w3c.dom.Document
137: */
138: public static org.w3c.dom.Document parse(DOMParser parser,
139: String source) {
140: try {
141: parser.parse(new InputSource(new StringReader(source)));
142: return parser.getDocument();
143: } catch (Exception exc) {
144: throw new NestedApplicationException(exc);
145: }
146: }
147: }
|