001: /*
002: * The contents of this file are subject to the
003: * Mozilla Public License Version 1.1 (the "License");
004: * you may not use this file except in compliance with the License.
005: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
006: *
007: * Software distributed under the License is distributed on an "AS IS"
008: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.
009: * See the License for the specific language governing rights and
010: * limitations under the License.
011: *
012: * The Initial Developer of the Original Code is Simulacra Media Ltd.
013: * Portions created by Simulacra Media Ltd are Copyright (C) Simulacra Media Ltd, 2004.
014: *
015: * All Rights Reserved.
016: *
017: * Contributor(s):
018: */
019:
020: package org.openharmonise.commons.xml;
021:
022: import org.w3c.dom.*;
023: import org.xml.sax.*;
024: import org.xml.sax.InputSource;
025:
026: import java.io.*;
027: import java.util.*;
028: import java.util.List;
029: import java.util.logging.*;
030: import java.util.logging.Level;
031:
032: import javax.xml.parsers.*;
033: import javax.xml.parsers.DocumentBuilderFactory;
034:
035: /**
036: * Class containing utility methods for processing XML Documents.
037: *
038: * @author Matthew Large
039: * @version $Revision: 1.2 $
040: *
041: */
042:
043: public class XMLUtils {
044:
045: private static final Logger m_logger = Logger
046: .getLogger(XMLUtils.class.getName());
047:
048: /**
049: * Returns the element name for the specifed <code>Element</code>, i.e.
050: * the local name in the case of namespaced element and the tag name
051: * in the case of a non-namespaced element.
052: *
053: * @param el the element
054: * @return the name of the specified element
055: */
056: public static String getElementName(Element el) {
057: String sElName = el.getLocalName();
058:
059: if (sElName == null) {
060: sElName = el.getTagName();
061: }
062:
063: return sElName;
064: }
065:
066: /**
067: * Returns the string value of the text node directly beneath an element.
068: *
069: * @param el Element from which to get the text value
070: * @return The text value from this element
071: */
072: static public String getChildTextValue(Element el) {
073: String sVal = null;
074:
075: if (el != null) {
076: Node node = el.getFirstChild();
077:
078: if (node instanceof Text) {
079: sVal = node.getNodeValue();
080: }
081: }
082:
083: return sVal;
084: }
085:
086: /**
087: * Method to get the first child matching a given name. Useful as the
088: * DOM getElementsByTagName uses the descendent axis rather than the child
089: * axis.
090: *
091: * @param el Element to check for child
092: * @param sName Name of child to check for
093: * @return First child element matching the given name, or null if not found
094: */
095: public static Element getFirstNamedChild(Element el, String sName) {
096: return (Element) XMLUtils.getFirstNamedChild((Node) el, sName);
097: }
098:
099: /**
100: * Method to get the first child matching a given name. Useful as the
101: * DOM getElementsByTagName uses the descendent axis rather than the child
102: * axis.
103: *
104: * @param node Node to check for child
105: * @param sName Name of child to check for
106: * @return First child node matching the given name, or null if not found
107: */
108: public static Node getFirstNamedChild(Node node, String sName) {
109: Node retn = null;
110: String sElName = sName;
111: int nIndex = sName.indexOf(":");
112: if (nIndex > -1) {
113: sElName = sName.substring(nIndex + 1);
114: }
115:
116: NodeList nl = node.getChildNodes();
117: for (int i = 0; i < nl.getLength() && retn == null; i++) {
118: Node currNode = nl.item(i);
119: if (currNode instanceof Element) {
120: String sNodeName = getElementName((Element) currNode);
121:
122: if (sNodeName.equals(sElName)) {
123: retn = currNode;
124: }
125: }
126: }
127:
128: return retn;
129: }
130:
131: /**
132: * Method to get the first child element, useful as the DOM getFirstChild
133: * method is not restricted to Element nodes.
134: *
135: * @param el Element to get child from
136: * @return First child element, or null if not found
137: */
138: public static Element getFirstElementChild(Element el) {
139: Element elRetn = null;
140:
141: NodeList nl = el.getChildNodes();
142: for (int i = 0; i < nl.getLength(); i++) {
143: if (nl.item(i).getNodeType() == Node.ELEMENT_NODE) {
144: elRetn = (Element) nl.item(i);
145: break;
146: }
147: }
148:
149: return elRetn;
150: }
151:
152: /**
153: * Properly encodes entity references in a XML file in case they have
154: * been previously decoded.
155: *
156: * @param file File to fix
157: * @return Fixed file
158: */
159: public static File fixXMLFileEntities(File file) {
160: File newFile = null;
161: BufferedWriter wBuff = null;
162: BufferedReader rBuff = null;
163: try {
164: newFile = File.createTempFile("sim-", "tmp");
165: wBuff = new BufferedWriter(new FileWriter(newFile));
166: rBuff = new BufferedReader(new FileReader(file));
167:
168: while (rBuff.ready()) {
169: String sLine = rBuff.readLine();
170: if (sLine.indexOf("&") > -1) {
171: String sFixed = XMLUtils.encodeXMLText(sLine);
172: wBuff.write(sFixed);
173: } else {
174: wBuff.write(sLine);
175: }
176: }
177: } catch (Exception e) {
178: m_logger.log(Level.WARNING, e.getMessage(), e);
179: } finally {
180: try {
181: wBuff.close();
182: rBuff.close();
183: } catch (Exception ex) {
184: m_logger.log(Level.WARNING, ex.getMessage(), ex);
185: }
186: }
187: return newFile;
188: }
189:
190: /**
191: * Properly XML encodes text, ensuring that entities are correctly
192: * encoded, e.g. & becomes &amp;.
193: *
194: * @param sText Text to encode
195: * @return Encoded text
196: */
197: private static String encodeXMLText(String sText) {
198: StringBuffer sBuff2 = new StringBuffer(sText);
199: StringBuffer sNewBuff = new StringBuffer();
200:
201: for (int i = 0; i < sBuff2.length(); i++) {
202: char currChar = sBuff2.charAt(i);
203: if (currChar == '&') {
204: if (sBuff2.charAt(i + 1) != 'a') {
205: sNewBuff.append(currChar);
206: } else if (sBuff2.charAt(i + 2) != 'p') {
207: sNewBuff.append(currChar);
208: } else if (sBuff2.charAt(i + 1) == 'a'
209: && sBuff2.charAt(i + 2) == 'm'
210: && sBuff2.charAt(i + 3) == 'p'
211: && sBuff2.charAt(i + 4) == ';'
212: && sBuff2.charAt(i + 5) == 'a'
213: && sBuff2.charAt(i + 6) == 'm'
214: && sBuff2.charAt(i + 7) == 'p'
215: && sBuff2.charAt(i + 8) == ';') {
216: i = i + 8;
217: sNewBuff.append("&amp;");
218: } else if (sBuff2.charAt(i + 1) == 'a'
219: && sBuff2.charAt(i + 2) == 'm'
220: && sBuff2.charAt(i + 3) == 'p'
221: && sBuff2.charAt(i + 4) == ';') {
222: i = i + 4;
223: sNewBuff.append("&amp;");
224: } else {
225: sNewBuff.append("&amp;");
226: }
227: } else {
228: sNewBuff.append(currChar);
229: }
230: }
231:
232: return sNewBuff.toString();
233: }
234:
235: /**
236: * Method to get the element children matching a given name. Useful as the
237: * DOM getElementsByTagName uses the descendent axis rather than the child
238: * axis.
239: *
240: * @param propEl Element to check for children
241: * @param sName Name of child to check for
242: * @return Child matching name, or null if not found
243: */
244: public static List getChildrenByName(Element propEl, String sName) {
245: NodeList nodes = propEl.getChildNodes();
246: List result = new ArrayList();
247: int nIndex = sName.indexOf(":");
248:
249: if (nIndex > 0) {
250: sName = sName.substring(nIndex + 1);
251: }
252:
253: for (int i = 0; i < nodes.getLength(); i++) {
254: Node tmpNode = nodes.item(i);
255: if (tmpNode.getNodeType() != Node.ELEMENT_NODE) {
256: continue;
257: }
258:
259: if (getElementName((Element) tmpNode).equals(sName)) {
260: result.add(tmpNode);
261: }
262: }
263:
264: return result;
265: }
266:
267: /**
268: * Method to deep copy a node from one document to another.
269: *
270: * @param originalNode Node to copy
271: * @param parent_destination Destination document
272: * @return Copy of original node, owned by parent_destination Document
273: */
274: public static Node copyNode(Node originalNode,
275: Document parent_destination) {
276: int i = 0;
277:
278: Node returnNode = null;
279:
280: if (originalNode.getNodeType() == Node.ELEMENT_NODE) {
281: Element el = parent_destination
282: .createElement(((Element) originalNode)
283: .getTagName());
284: NamedNodeMap attribs = originalNode.getAttributes();
285:
286: for (i = 0; i < attribs.getLength(); i++) {
287: Attr nextAtt = (Attr) attribs.item(i);
288: el.setAttribute(nextAtt.getNodeName(), nextAtt
289: .getValue());
290: }
291:
292: NodeList nodes = originalNode.getChildNodes();
293:
294: for (i = 0; i < nodes.getLength(); i++) {
295: if ((nodes.item(i).getNodeType() == Node.ELEMENT_NODE)
296: || (nodes.item(i).getNodeType() == Node.TEXT_NODE)) {
297: el.appendChild(copyNode(nodes.item(i),
298: parent_destination));
299: }
300: }
301:
302: returnNode = (Node) el;
303: } else if (originalNode.getNodeType() == Node.TEXT_NODE) {
304: Text el = parent_destination.createTextNode(originalNode
305: .getNodeValue());
306:
307: returnNode = (Node) el;
308: }
309:
310: return returnNode;
311: }
312:
313: /**
314: * Copies all children from one element to another which may be owned by a
315: * different Document.
316: *
317: * @param parent_destination Destination element to copy children to
318: * @param parent_source Source element to copy children from
319: * @param doc_destination Destination document
320: */
321: public static void copyChildren(Element parent_destination,
322: Element parent_source, Document doc_destination) {
323: copyChildren(parent_destination, parent_source,
324: doc_destination, new Vector());
325: }
326:
327: /**
328: * Copies all children from one element to another which may be owned by a
329: * different Document.
330: *
331: * @param parent_destination Destination element to copy children to
332: * @param parent_source Source element to copy children from
333: * @param doc_destination Destination document
334: * @param ignoreTags Element names not to copy
335: */
336: public static void copyChildren(Element parent_destination,
337: Element parent_source, Document doc_destination,
338: Vector ignoreTags) {
339: // copy all caption nodes (if any exist)
340: NodeList child_nodes = parent_source.getChildNodes();
341:
342: for (int k = 0; k < child_nodes.getLength(); k++) {
343: if ((child_nodes.item(k).getNodeType() == Node.ELEMENT_NODE)
344: && ignoreTags.contains(((Element) child_nodes
345: .item(k)).getTagName())) {
346: continue;
347: }
348:
349: Node node = child_nodes.item(k);
350:
351: if (node != null) {
352: parent_destination.appendChild(copyNode(node,
353: doc_destination));
354: }
355: }
356: }
357:
358: /**
359: * Returns a <code>Document</code> from the given <code>String</code>
360: *
361: * @param sVal XML document in <code>String</code> form
362: * @return
363: * @throws SAXException
364: * @throws IOException
365: * @throws ParserConfigurationException
366: */
367: public static Document getDocumentFromString(String sVal)
368: throws SAXException, IOException,
369: ParserConfigurationException {
370: DocumentBuilderFactory factory = DocumentBuilderFactory
371: .newInstance();
372: factory.setNamespaceAware(true);
373:
374: StringReader sreader = new StringReader(sVal);
375:
376: InputSource isource = new InputSource(sreader);
377:
378: return factory.newDocumentBuilder().parse(isource);
379: }
380: }
|