001: /* ====================================================================
002: * The LateralNZ Software License, Version 1.0
003: *
004: * Copyright (c) 2003 LateralNZ. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above copyright
011: * notice, this list of conditions and the following disclaimer.
012: *
013: * 2. Redistributions in binary form must reproduce the above copyright
014: * notice, this list of conditions and the following disclaimer in
015: * the documentation and/or other materials provided with the
016: * distribution.
017: *
018: * 3. The end-user documentation included with the redistribution,
019: * if any, must include the following acknowledgment:
020: * "This product includes software developed by
021: * LateralNZ (http://www.lateralnz.org/) and other third parties."
022: * Alternately, this acknowledgment may appear in the software itself,
023: * if and wherever such third-party acknowledgments normally appear.
024: *
025: * 4. The names "LateralNZ" must not be used to endorse or promote
026: * products derived from this software without prior written
027: * permission. For written permission, please
028: * contact oss@lateralnz.org.
029: *
030: * 5. Products derived from this software may not be called "Panther",
031: * or "Lateral" or "LateralNZ", nor may "PANTHER" or "LATERAL" or
032: * "LATERALNZ" appear in their name, without prior written
033: * permission of LateralNZ.
034: *
035: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
036: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
037: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
038: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
039: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
040: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
041: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
042: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
043: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
044: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
045: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
046: * SUCH DAMAGE.
047: * ====================================================================
048: *
049: * This software consists of voluntary contributions made by many
050: * individuals on behalf of LateralNZ. For more
051: * information on Lateral, please see http://www.lateralnz.com/ or
052: * http://www.lateralnz.org
053: *
054: */
055: package org.lateralnz.common.util;
056:
057: import java.io.ByteArrayInputStream;
058: import java.io.IOException;
059: import java.util.LinkedList;
060: import java.util.List;
061: import java.util.StringTokenizer;
062: import javax.xml.parsers.DocumentBuilder;
063: import javax.xml.parsers.DocumentBuilderFactory;
064:
065: import org.w3c.dom.*;
066: import org.xml.sax.SAXException;
067:
068: /**
069: * common XML utility functions
070: *
071: * @author J R Briggs
072: */
073: public final class XMLUtils implements Constants {
074: private static final String IGNORE_CDATA_MATCH_PATTERN = "(.*?)(<!\\[CDATA.*?\\]\\]>)";
075: private static final String CDATA_BEGIN = "<![CDATA";
076: private static final String CR_NL = RETURN + NEWLINE;
077:
078: private static DocumentBuilder dombuilder = null;
079: private static java.util.regex.Pattern cdataPattern = null;
080:
081: static {
082: try {
083: DocumentBuilderFactory dbf = DocumentBuilderFactory
084: .newInstance();
085: dbf.setValidating(false);
086: dombuilder = dbf.newDocumentBuilder();
087: dombuilder.setEntityResolver(null);
088: } catch (Exception e) {
089: e.printStackTrace();
090: }
091: }
092:
093: private XMLUtils() {
094: }
095:
096: /**
097: * flatten an XML node into a string (reverse parse?)
098: */
099: public static final String flatten(Node xml) {
100: StringBuffer sb = new StringBuffer();
101:
102: flatten(xml, sb);
103:
104: return sb.toString();
105: }
106:
107: private static final void flatten(Node xml, StringBuffer sb) {
108: sb.append(LEFT_CHEV).append(xml.getNodeName());
109:
110: // attributes
111: NamedNodeMap nnm = xml.getAttributes();
112: if (nnm != null) {
113: for (int i = 0; i < nnm.getLength(); i++) {
114: Node n = nnm.item(i);
115: sb.append(SPACE).append(n.getNodeName()).append(EQUALS)
116: .append(QUOTE).append(n.getNodeValue()).append(
117: QUOTE);
118: }
119: }
120: sb.append(RIGHT_CHEV);
121:
122: // child nodes
123: NodeList nl = xml.getChildNodes();
124: for (int i = 0; i < nl.getLength(); i++) {
125: if (nl.item(i).getNodeType() == Node.TEXT_NODE) {
126: sb.append(nl.item(i).getNodeValue());
127: } else {
128: flatten(nl.item(i), sb);
129: }
130: }
131: sb.append(LEFT_CHEV).append(FORWARD_SLASH).append(
132: xml.getNodeName()).append(RIGHT_CHEV);
133: }
134:
135: /**
136: * get the value of an xml attribute. For example, given:
137: * <pre>
138: * <xmlnode att1="test1" att2="test2">something</xmlnode>
139: * </pre>
140: * calling getattribute(node, "att2", "blah") should return "test2"
141: */
142: public static final String getAttributeValue(Node xml,
143: String attribute, String def) throws Exception {
144: NamedNodeMap nnm = xml.getAttributes();
145: if (nnm == null) {
146: return def;
147: }
148: Node n = nnm.getNamedItem(attribute);
149: String tmp = null;
150: if (n != null) {
151: tmp = n.getNodeValue();
152: }
153: if (tmp == null) {
154: return def;
155: } else {
156: return tmp;
157: }
158: }
159:
160: /**
161: * get the 'first child' element value of an element
162: */
163: public static final String getFirstChildElementValue(Element e,
164: String elemName) {
165: NodeList nl = e.getElementsByTagName(elemName);
166: if (nl.getLength() < 1) {
167: return EMPTY;
168: }
169: Element e2 = (Element) nl.item(0);
170: return e2.getFirstChild().getNodeValue();
171: }
172:
173: /**
174: * get the first node with a specified name
175: */
176: public static final Node getNamedNode(Node node, String name) {
177: NodeList nl = node.getChildNodes();
178: for (int i = 0; i < nl.getLength(); i++) {
179: Node n = nl.item(i);
180: if (n.getNodeName().equals(name)) {
181: return n;
182: }
183: }
184: return null;
185: }
186:
187: /**
188: * get a list of XML nodes based upon their nodename
189: */
190: public static final List getNodesByName(Node node, String name) {
191: LinkedList rtn = new LinkedList();
192: NodeList nl = node.getChildNodes();
193: for (int i = 0; i < nl.getLength(); i++) {
194: Node n = nl.item(i);
195: if (n.getNodeName().equals(name)) {
196: rtn.add(n);
197: }
198: }
199: return rtn;
200: }
201:
202: /**
203: * return the value of a node
204: */
205: public static final String getNodeValue(Node n) {
206: if (n == null || n.getChildNodes().getLength() < 1) {
207: return EMPTY;
208: } else {
209: Node tmp = n.getChildNodes().item(0);
210: if (tmp == null) {
211: return EMPTY;
212: } else {
213: return tmp.getNodeValue();
214: }
215: }
216: }
217:
218: /**
219: * get the node type as text
220: */
221: public static final String getNodeType(Node n) {
222: switch (n.getNodeType()) {
223: case Node.ATTRIBUTE_NODE:
224: return "attribute";
225: case Node.CDATA_SECTION_NODE:
226: return "cdata section";
227: case Node.COMMENT_NODE:
228: return "comment";
229: case Node.DOCUMENT_FRAGMENT_NODE:
230: return "document fragment";
231: case Node.DOCUMENT_NODE:
232: return "document";
233: case Node.DOCUMENT_TYPE_NODE:
234: return "document type";
235: case Node.ELEMENT_NODE:
236: return "element";
237: case Node.ENTITY_NODE:
238: return "entity";
239: case Node.ENTITY_REFERENCE_NODE:
240: return "entity reference";
241: case Node.NOTATION_NODE:
242: return "notation";
243: case Node.PROCESSING_INSTRUCTION_NODE:
244: return "processing instruction";
245: case Node.TEXT_NODE:
246: return "text";
247: }
248: return EMPTY;
249: }
250:
251: /**
252: * return true if a node contains an attribute
253: */
254: public static final boolean hasAttribute(Node xml, String attribute)
255: throws Exception {
256: NamedNodeMap nnm = xml.getAttributes();
257: if (nnm.getNamedItem(attribute) != null) {
258: return true;
259: } else {
260: return false;
261: }
262: }
263:
264: /**
265: * parse an xml string into a org.w3c.dom.Document object
266: */
267: public static final Document parse(String xml) throws SAXException,
268: IOException {
269: ByteArrayInputStream bais = null;
270: try {
271: if (dombuilder == null) {
272: throw new SAXException("invalid document builder");
273: } else {
274: Document doc;
275: bais = new ByteArrayInputStream(xml.getBytes());
276: doc = dombuilder.parse(bais);
277: return doc;
278: }
279: } finally {
280: IOUtils.close(bais);
281: }
282: }
283:
284: /**
285: * preprocess and xml string, removing all leading and trailing whitespace
286: * from each line, and removing carriage returns and newlines from each line
287: * except where they fall within a CDATA section.
288: */
289: public static final String preprocess(String xml) throws Exception {
290: xml = StringUtils.stripLTSpaces(xml);
291: //PatternMatcherInput input = new PatternMatcherInput(xml);
292: //Pattern p = compiler.compile(IGNORE_CDATA_MATCH_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK);
293: if (cdataPattern == null) {
294: synchronized (XMLUtils.class) {
295: cdataPattern = java.util.regex.Pattern.compile(
296: IGNORE_CDATA_MATCH_PATTERN,
297: java.util.regex.Pattern.CASE_INSENSITIVE
298: | java.util.regex.Pattern.DOTALL);
299: }
300: }
301:
302: // if we have a line which starts with a < but does not end with >
303: // then we need to preserve a space there (so to not cause
304: // parsing problems later. For example:
305: // <mbean code="com.something.Someclass"
306: // name="myname">
307: StringBuffer rtn = new StringBuffer();
308: StringTokenizer st = new StringTokenizer(xml, NEWLINE + RETURN,
309: true);
310: while (st.hasMoreTokens()) {
311: String tok = st.nextToken();
312: rtn.append(tok);
313: int lpos = tok.lastIndexOf(CHAR_LEFT_CHEV);
314: int rpos = tok.lastIndexOf(CHAR_RIGHT_CHEV);
315: if (lpos >= 0 && rpos < lpos) {
316: rtn.append(SPACE);
317: }
318: }
319:
320: xml = rtn.toString();
321: rtn = new StringBuffer();
322: int total = 0;
323:
324: if (xml.indexOf(CDATA_BEGIN) >= 0) {
325: java.util.regex.Matcher matcher = cdataPattern.matcher(xml);
326:
327: // now look for any newlines that aren't within a CDATA section
328: // and remove them
329: while (matcher.find()) {
330: int groups = matcher.groupCount();
331: for (int i = 1; i <= groups; i++) {
332: String tmp = matcher.group(i);
333: total += tmp.length();
334: if (!tmp.startsWith(CDATA_BEGIN)) {
335: tmp = StringUtils.remove(tmp, CR_NL);
336: }
337: rtn.append(tmp);
338: }
339: }
340: }
341:
342: // remove all remaining newlines
343: rtn.append(StringUtils.remove(xml.substring(total), CR_NL));
344: return rtn.toString();
345: }
346:
347: }
|