001: /*
002: * ====================================================================
003: * JAFFA - Java Application Framework For All
004: *
005: * Copyright (C) 2002 JAFFA Development Group
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2.1 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public
018: * License along with this library; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020: *
021: * Redistribution and use of this software and associated documentation ("Software"),
022: * with or without modification, are permitted provided that the following conditions are met:
023: * 1. Redistributions of source code must retain copyright statements and notices.
024: * Redistributions must also contain a copy of this document.
025: * 2. Redistributions in binary form must reproduce the above copyright notice,
026: * this list of conditions and the following disclaimer in the documentation
027: * and/or other materials provided with the distribution.
028: * 3. The name "JAFFA" must not be used to endorse or promote products derived from
029: * this Software without prior written permission. For written permission,
030: * please contact mail to: jaffagroup@yahoo.com.
031: * 4. Products derived from this Software may not be called "JAFFA" nor may "JAFFA"
032: * appear in their names without prior written permission.
033: * 5. Due credit should be given to the JAFFA Project (http://jaffa.sourceforge.net).
034: *
035: * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
036: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
037: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
038: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
039: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
040: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
041: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
042: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
043: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
044: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
045: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
046: * SUCH DAMAGE.
047: * ====================================================================
048: */
049:
050: package org.jaffa.util;
051:
052: import org.w3c.dom.Node;
053: import org.w3c.dom.NodeList;
054: import java.util.*;
055: import java.io.*;
056: import java.net.URL;
057:
058: /** Utility Class for Common XML Manipulation routines.
059: */
060: public class XmlHelper {
061:
062: /** Test whether the input element has a child element.
063: * @param node The Element to be tested.
064: * @return true if this element has at least one child element.
065: */
066: public static boolean hasChildElements(Node node) {
067: boolean result = false;
068: if (node != null) {
069: NodeList children = node.getChildNodes();
070: if (children != null) {
071: for (int i = 0; i < children.getLength(); i++) {
072: Node child = children.item(i);
073: if (child.getNodeType() == Node.ELEMENT_NODE) {
074: result = true;
075: break;
076: }
077: }
078: }
079: }
080: return result;
081: }
082:
083: /** This returns the textual content directly held under this element. This will include all text within this single element, including whitespace and CDATA sections if they exist. It's essentially the concatenation of all Text and CDATA nodes. The call does not recurse into child elements. If no textual value exists for the element, an empty String ("") is returned.
084: * @param node The Element for which the Text is to be returned
085: * @return text content for the element, or empty string if none.
086: */
087: public static String getTextTrim(Node node) {
088: StringBuffer buf = new StringBuffer();
089: if (node != null) {
090: NodeList children = node.getChildNodes();
091: if (children != null) {
092: for (int i = 0; i < children.getLength(); i++) {
093: Node child = children.item(i);
094: if (child.getNodeType() == Node.CDATA_SECTION_NODE
095: || child.getNodeType() == Node.TEXT_NODE)
096: buf.append(child.getNodeValue().trim());
097: }
098: }
099: }
100: return buf.toString();
101: }
102:
103: /** This routine will return an InputStream which will strip off the DOCTYPE declaration from the source URL.
104: * @param in The source URL from which the DOCTYPE will be stripped off.
105: * @return The InputStream with no DOCTYPE.
106: * @throws IOException if an I/O error occurs.
107: * @see #stripDoctypeDeclaration(InputStream)
108: */
109: public static InputStream stripDoctypeDeclaration(URL in)
110: throws IOException {
111: return stripDoctypeDeclaration(in.openStream());
112: }
113:
114: /** This routine will return an InputStream which will strip off the DOCTYPE declaration from the source InputStream.
115: * Example declaration: '<!DOCTYPE blah blah blah>'
116: * <br><br>
117: * This is mostly used on incomming JAXB unmarshalling where there is no way specify a default
118: * entity resolver. Here is an example...
119: * <pre>
120: * URL xmlFile = "testfile.xml";
121: *
122: * try {
123: * xmlFile = URLHelper.newExtendedURL(name);
124: * } catch (MalformedURLException e) {
125: * log.fatal("Can't Find Components Definition File. Bad URL - " + name, e);
126: * return null;
127: * }
128: *
129: * try {
130: * // create a JAXBContext capable of handling classes generated into the package
131: * JAXBContext jc = JAXBContext.newInstance("org.jaffa.presentation.portlet.component.componentdomain");
132: *
133: * // create an Unmarshaller
134: * Unmarshaller u = jc.createUnmarshaller();
135: *
136: * // enable validation
137: * u.setValidating(true);
138: *
139: * // unmarshal a document into a tree of Java content objects composed of classes from the package.
140: * compList = (Components) u.unmarshal(<b>XmlHelper.stripDoctypeDeclaration</b>(xmlFile));
141: * } catch (JAXBException e) {
142: * log.fatal("XML Formatting Error Reading Components Definition File", e);
143: * return null;
144: * } catch (IOException e) {
145: * log.fatal("Error in Reading Components Definition File", e);
146: * return null;
147: * }
148: * </pre>
149: * @param in The source InputStream from which the DOCTYPE will be stripped off.
150: * @return The InputStream with no DOCTYPE.
151: * @throws IOException if an I/O error occurs.
152: */
153: public static InputStream stripDoctypeDeclaration(InputStream in)
154: throws IOException {
155: // Create a BufferedInputStream for efficiency
156: in = new BufferedInputStream(in);
157:
158: // This array will hold the bytes read off the input, until the DoctypeDeclaration is encountered
159: byte[] byteArray = new byte[in.available()];
160:
161: // This will maintain a count of the bytes added to the array
162: int byteArrayCount = 0;
163:
164: // This flag will indicate if the XML comment block is being parsed
165: boolean insideCommentBlock = false;
166:
167: int aByte;
168: while (true) {
169: // If insideCommentBlock, then simply add each byte to the array until we encounter '-->', at which point we'll reset the insideCommentBlock flag
170: if (insideCommentBlock) {
171: aByte = in.read();
172: if (aByte == -1)
173: break;
174: byteArray[byteArrayCount++] = (byte) aByte;
175: if (aByte != '-')
176: continue;
177:
178: aByte = in.read();
179: if (aByte == -1)
180: break;
181: byteArray[byteArrayCount++] = (byte) aByte;
182: if (aByte != '-')
183: continue;
184:
185: aByte = in.read();
186: if (aByte == -1)
187: break;
188: byteArray[byteArrayCount++] = (byte) aByte;
189: if (aByte != '>')
190: continue;
191:
192: insideCommentBlock = false;
193: }
194:
195: // Now check for '<!DOCTYPE'
196: // Also check for '<!--', to determine the comment block
197: if ((aByte = in.read()) != '<') {
198: if (aByte == -1)
199: break;
200: byteArray[byteArrayCount++] = (byte) aByte;
201: continue;
202: }
203:
204: if ((aByte = in.read()) != '!') {
205: byteArray[byteArrayCount++] = '<';
206: if (aByte == -1)
207: break;
208: byteArray[byteArrayCount++] = (byte) aByte;
209: continue;
210: }
211:
212: if ((aByte = in.read()) != 'D') {
213: byteArray[byteArrayCount++] = '<';
214: byteArray[byteArrayCount++] = '!';
215: if (aByte == -1)
216: break;
217: byteArray[byteArrayCount++] = (byte) aByte;
218:
219: // Check for the comment marker '<!--'
220: if (aByte == '-') {
221: aByte = in.read();
222: if (aByte == -1)
223: break;
224: byteArray[byteArrayCount++] = (byte) aByte;
225: if (aByte == '-')
226: insideCommentBlock = true;
227: }
228: continue;
229: }
230:
231: if ((aByte = in.read()) != 'O') {
232: byteArray[byteArrayCount++] = '<';
233: byteArray[byteArrayCount++] = '!';
234: byteArray[byteArrayCount++] = 'D';
235: if (aByte == -1)
236: break;
237: byteArray[byteArrayCount++] = (byte) aByte;
238: continue;
239: }
240:
241: if ((aByte = in.read()) != 'C') {
242: byteArray[byteArrayCount++] = '<';
243: byteArray[byteArrayCount++] = '!';
244: byteArray[byteArrayCount++] = 'D';
245: byteArray[byteArrayCount++] = 'O';
246: if (aByte == -1)
247: break;
248: byteArray[byteArrayCount++] = (byte) aByte;
249: continue;
250: }
251:
252: if ((aByte = in.read()) != 'T') {
253: byteArray[byteArrayCount++] = '<';
254: byteArray[byteArrayCount++] = '!';
255: byteArray[byteArrayCount++] = 'D';
256: byteArray[byteArrayCount++] = 'O';
257: byteArray[byteArrayCount++] = 'C';
258: if (aByte == -1)
259: break;
260: byteArray[byteArrayCount++] = (byte) aByte;
261: continue;
262: }
263:
264: if ((aByte = in.read()) != 'Y') {
265: byteArray[byteArrayCount++] = '<';
266: byteArray[byteArrayCount++] = '!';
267: byteArray[byteArrayCount++] = 'D';
268: byteArray[byteArrayCount++] = 'O';
269: byteArray[byteArrayCount++] = 'C';
270: byteArray[byteArrayCount++] = 'T';
271: if (aByte == -1)
272: break;
273: byteArray[byteArrayCount++] = (byte) aByte;
274: continue;
275: }
276:
277: if ((aByte = in.read()) != 'P') {
278: byteArray[byteArrayCount++] = '<';
279: byteArray[byteArrayCount++] = '!';
280: byteArray[byteArrayCount++] = 'D';
281: byteArray[byteArrayCount++] = 'O';
282: byteArray[byteArrayCount++] = 'C';
283: byteArray[byteArrayCount++] = 'T';
284: byteArray[byteArrayCount++] = 'Y';
285: if (aByte == -1)
286: break;
287: byteArray[byteArrayCount++] = (byte) aByte;
288: continue;
289: }
290:
291: if ((aByte = in.read()) != 'E') {
292: byteArray[byteArrayCount++] = '<';
293: byteArray[byteArrayCount++] = '!';
294: byteArray[byteArrayCount++] = 'D';
295: byteArray[byteArrayCount++] = 'O';
296: byteArray[byteArrayCount++] = 'C';
297: byteArray[byteArrayCount++] = 'T';
298: byteArray[byteArrayCount++] = 'Y';
299: byteArray[byteArrayCount++] = 'P';
300: if (aByte == -1)
301: break;
302: byteArray[byteArrayCount++] = (byte) aByte;
303: continue;
304: }
305:
306: // We've encountered '<!DOCTYPE'
307: // Consume all the bytes till we get '>'
308: // *** NOTE*** This logic will fail, if any of the text inside the DOCTYPE declaration contains a '>'. will fix it, if we run into the problem.
309: while ((aByte = in.read()) != -1) {
310: if (aByte == '>')
311: break;
312: }
313:
314: // In a valid XML document, there can be only one instance of the <!DOCTYPE...> tag.
315: // So simply break out of the loop and create a SequenceInputStream, by concatenating the byteArray and whatever is left in the original InputStream
316: break;
317: }
318:
319: if (in.available() > 0) {
320: if (byteArrayCount > 0)
321: return new SequenceInputStream(
322: new ByteArrayInputStream(byteArray, 0,
323: byteArrayCount), in);
324: else
325: return in;
326: } else
327: return new ByteArrayInputStream(byteArray, 0,
328: byteArrayCount);
329: }
330:
331: }
|