001: /*
002: * $Id: ElementParser.java,v 1.42 2007/09/18 11:21:01 agoubard Exp $
003: *
004: * Copyright 2003-2007 Orange Nederland Breedband B.V.
005: * See the COPYRIGHT file for redistribution and use restrictions.
006: */
007: package org.xins.common.xml;
008:
009: import java.io.ByteArrayInputStream;
010: import java.io.IOException;
011: import java.io.InputStream;
012: import java.io.Reader;
013: import java.io.StringReader;
014: import java.util.Stack;
015:
016: import org.xml.sax.Attributes;
017: import org.xml.sax.InputSource;
018: import org.xml.sax.SAXException;
019: import org.xml.sax.helpers.DefaultHandler;
020:
021: import org.xins.common.MandatoryArgumentChecker;
022: import org.xins.common.Utils;
023: import org.xins.common.text.ParseException;
024: import org.xins.common.text.TextUtils;
025:
026: /**
027: * XML element parser. XML is parsed to produce {@link Element} objects.
028: *
029: * <p>Note: This parser is
030: * <a href="http://www.w3.org/TR/REC-xml-names/">XML Namespaces</a>-aware.
031: *
032: * @version $Revision: 1.42 $ $Date: 2007/09/18 11:21:01 $
033: *
034: * @author <a href="mailto:anthony.goubard@japplis.com">Anthony Goubard</a>
035: * @author <a href="mailto:ernst@ernstdehaan.com">Ernst de Haan</a>
036: *
037: * @since XINS 1.1.0
038: */
039: public class ElementParser {
040:
041: /**
042: * Error state for the SAX event handler.
043: */
044: private static final State ERROR = new State("ERROR");
045:
046: /**
047: * State for the SAX event handler in the data section (at any depth within
048: * the <code>data</code> element).
049: */
050: private static final State PARSING = new State("PARSING");
051:
052: /**
053: * State for the SAX event handler for the final state, when parsing is
054: * finished.
055: */
056: private static final State FINISHED = new State("FINISHED");
057:
058: /**
059: * Constructs a new <code>ElementParser</code>.
060: */
061: public ElementParser() {
062:
063: // empty
064: }
065:
066: /**
067: * Parses the specified String to create an XML <code>Element</code> object.
068: *
069: * @param text
070: * the XML text to be parsed, not <code>null</code>.
071: *
072: * @return
073: * the parsed result, not <code>null</code>.
074: *
075: * @throws IllegalArgumentException
076: * if <code>text == null</code>.
077: *
078: * @throws ParseException
079: * if the content of the character stream is not considered to be valid XML.
080: *
081: * @since XINS 2.0.
082: */
083: public Element parse(String text) throws IllegalArgumentException,
084: ParseException {
085:
086: // Check preconditions
087: MandatoryArgumentChecker.check("text", text);
088:
089: try {
090: return parse(new StringReader(text));
091: } catch (IOException ioe) {
092: throw Utils.logProgrammingError(ioe);
093: }
094: }
095:
096: /**
097: * Parses content of a character stream to create an XML
098: * <code>Element</code> object.
099: *
100: * @param in
101: * the byte stream that is supposed to contain XML to be parsed,
102: * not <code>null</code>.
103: *
104: * @return
105: * the parsed result, not <code>null</code>.
106: *
107: * @throws IllegalArgumentException
108: * if <code>in == null</code>.
109: *
110: * @throws IOException
111: * if there is an I/O error.
112: *
113: * @throws ParseException
114: * if the content of the character stream is not considered to be valid
115: * XML.
116: *
117: * @since XINS 2.0.
118: */
119: public Element parse(InputStream in)
120: throws IllegalArgumentException, IOException,
121: ParseException {
122:
123: // Check preconditions
124: MandatoryArgumentChecker.check("in", in);
125:
126: // Wrap the Reader in a SAX InputSource object
127: InputSource source = new InputSource(in);
128:
129: return parse(source);
130: }
131:
132: /**
133: * Parses content of a character stream to create an XML
134: * <code>Element</code> object.
135: *
136: * @param in
137: * the character stream that is supposed to contain XML to be parsed,
138: * not <code>null</code>.
139: *
140: * @return
141: * the parsed result, not <code>null</code>.
142: *
143: * @throws IllegalArgumentException
144: * if <code>in == null</code>.
145: *
146: * @throws IOException
147: * if there is an I/O error.
148: *
149: * @throws ParseException
150: * if the content of the character stream is not considered to be valid
151: * XML.
152: */
153: public Element parse(Reader in) throws IllegalArgumentException,
154: IOException, ParseException {
155:
156: // Check preconditions
157: MandatoryArgumentChecker.check("in", in);
158:
159: // Wrap the Reader in a SAX InputSource object
160: InputSource source = new InputSource(in);
161:
162: return parse(source);
163: }
164:
165: /**
166: * Parses content of a character stream to create an XML
167: * <code>Element</code> object.
168: *
169: * @param source
170: * the input source that is supposed to contain XML to be parsed,
171: * not <code>null</code>.
172: *
173: * @return
174: * the parsed result, not <code>null</code>.
175: *
176: * @throws IOException
177: * if there is an I/O error.
178: *
179: * @throws ParseException
180: * if the content of the character stream is not considered to be valid
181: * XML.
182: */
183: private Element parse(InputSource source) throws IOException,
184: ParseException {
185:
186: // TODO: Consider using an XMLReader instead of a SAXParser
187:
188: // Initialize our SAX event handler
189: Handler handler = new Handler();
190:
191: try {
192: // Let SAX parse the XML, using our handler
193: SAXParserProvider.get().parse(source, handler);
194:
195: } catch (SAXException exception) {
196:
197: // TODO: Log: Parsing failed
198: String exMessage = exception.getMessage();
199:
200: // Construct complete message
201: String message = "Failed to parse XML";
202: if (TextUtils.isEmpty(exMessage)) {
203: message += '.';
204: } else {
205: message += ": " + exMessage;
206: }
207:
208: // Throw exception with message, and register cause exception
209: throw new ParseException(message, exception, exMessage);
210: }
211:
212: Element element = handler.getElement();
213:
214: return element;
215: }
216:
217: /**
218: * SAX event handler that will parse XML.
219: *
220: * @version $Revision: 1.42 $ $Date: 2007/09/18 11:21:01 $
221: * @author <a href="mailto:anthony.goubard@japplis.com">Anthony Goubard</a>
222: * @author <a href="mailto:ernst@ernstdehaan.com">Ernst de Haan</a>
223: */
224: private static class Handler extends DefaultHandler {
225:
226: /**
227: * The current state. Never <code>null</code>.
228: */
229: private State _state;
230:
231: /**
232: * The element resulting of the parsing.
233: */
234: private Element _element;
235:
236: /**
237: * The character content (CDATA or PCDATA) of the element currently
238: * being parsed.
239: */
240: private StringBuffer _characters;
241:
242: /**
243: * The stack of child elements within the data section. The top element
244: * is always <code><data/></code>.
245: */
246: private Stack _dataElementStack;
247:
248: /**
249: * The level for the element pointer within the XML document. Initially
250: * this field is <code>-1</code>, which indicates the current element
251: * pointer is outside the document. The value <code>0</code> is for the
252: * root element (<code>result</code>), etc.
253: */
254: private int _level;
255:
256: /**
257: * Constructs a new <code>Handler</code> instance.
258: */
259: private Handler() {
260:
261: _state = PARSING;
262: _level = -1;
263: _characters = new StringBuffer(145);
264: _dataElementStack = new Stack();
265: }
266:
267: /**
268: * Receive notification of the beginning of an element.
269: *
270: * @param namespaceURI
271: * the namespace URI, can be <code>null</code>.
272: *
273: * @param localName
274: * the local name (without prefix); cannot be <code>null</code>.
275: *
276: * @param qName
277: * the qualified name (with prefix), can be <code>null</code> since
278: * <code>namespaceURI</code> and <code>localName</code> are always
279: * used instead.
280: *
281: * @param atts
282: * the attributes attached to the element; if there are no
283: * attributes, it shall be an empty {@link Attributes} object; cannot
284: * be <code>null</code>.
285: *
286: * @throws IllegalArgumentException
287: * if <code>localName == null || atts == null</code>.
288: *
289: * @throws SAXException
290: * if the parsing failed.
291: */
292: public void startElement(String namespaceURI, String localName,
293: String qName, Attributes atts)
294: throws IllegalArgumentException, SAXException {
295:
296: // Temporarily enter ERROR state, on success this state is left
297: State currentState = _state;
298: _state = ERROR;
299:
300: // Make sure namespaceURI is either null or non-empty
301: namespaceURI = "".equals(namespaceURI) ? null
302: : namespaceURI;
303:
304: // Check preconditions
305: MandatoryArgumentChecker.check("localName", localName,
306: "atts", atts);
307:
308: // Increase the element depth level
309: _level++;
310:
311: if (currentState == ERROR) {
312: String detail = "Unexpected state " + currentState
313: + " (level=" + _level + ')';
314: throw Utils.logProgrammingError(detail);
315:
316: } else {
317:
318: // Find the namespace prefix
319: String prefix = null;
320:
321: if (qName != null && qName.indexOf(':') != -1) {
322: prefix = qName.substring(0, qName.indexOf(':'));
323: }
324:
325: // Construct a Element
326: Element element = new Element(prefix, namespaceURI,
327: localName);
328:
329: // Add all attributes
330: for (int i = 0; i < atts.getLength(); i++) {
331: String attrNamespaceURI = atts.getURI(i);
332: String attrLocalName = atts.getLocalName(i);
333: String attrValue = atts.getValue(i);
334: String attrQName = atts.getQName(i);
335: String attrPrefix = null;
336: if (attrQName != null
337: && attrQName.indexOf(':') != -1) {
338: attrPrefix = attrQName.substring(0, attrQName
339: .indexOf(':'));
340: }
341:
342: element.setAttribute(attrPrefix, attrNamespaceURI,
343: attrLocalName, attrValue);
344: }
345:
346: // Push the element on the stack
347: _dataElementStack.push(element);
348:
349: // Reserve buffer for PCDATA
350: _characters = new StringBuffer(145);
351:
352: // Reset the state from ERROR back to PARSING
353: _state = PARSING;
354: }
355: }
356:
357: /**
358: * Receive notification of the end of an element.
359: *
360: * @param namespaceURI
361: * the namespace URI, can be <code>null</code>.
362: *
363: * @param localName
364: * the local name (without prefix); cannot be <code>null</code>.
365: *
366: * @param qName
367: * the qualified name (with prefix), can be <code>null</code> since
368: * <code>namespaceURI</code> and <code>localName</code> are only
369: * used.
370: *
371: * @throws IllegalArgumentException
372: * if <code>localName == null</code>.
373: */
374: public void endElement(String namespaceURI, String localName,
375: String qName) throws IllegalArgumentException {
376:
377: // Temporarily enter ERROR state, on success this state is left
378: State currentState = _state;
379: _state = ERROR;
380:
381: // Check preconditions
382: MandatoryArgumentChecker.check("localName", localName);
383:
384: if (currentState == ERROR) {
385: String detail = "Unexpected state " + currentState
386: + " (level=" + _level + ')';
387: throw Utils.logProgrammingError(detail);
388:
389: // Within data section
390: } else {
391:
392: // Get the Element for which we process the end tag
393: Element child = (Element) _dataElementStack.pop();
394:
395: // Set the PCDATA content on the element
396: if (_characters.length() > 0) {
397: child.setText(_characters.toString());
398: }
399:
400: // Add the child to the parent
401: if (_dataElementStack.size() > 0) {
402: Element parent = (Element) _dataElementStack.peek();
403: parent.addChild(child);
404:
405: // Reset the state back from ERROR to PARSING
406: _state = PARSING;
407: } else {
408: _element = child;
409: _state = FINISHED;
410: }
411:
412: }
413:
414: _level--;
415: _characters = new StringBuffer(145);
416: }
417:
418: /**
419: * Receive notification of character data.
420: *
421: * @param ch
422: * the <code>char</code> array that contains the characters from the
423: * XML document, cannot be <code>null</code>.
424: *
425: * @param start
426: * the start index within <code>ch</code>.
427: *
428: * @param length
429: * the number of characters to take from <code>ch</code>.
430: *
431: * @throws IndexOutOfBoundsException
432: * if characters outside the allowed range are specified.
433: *
434: * @throws SAXException
435: * if the parsing failed.
436: */
437: public void characters(char[] ch, int start, int length)
438: throws IndexOutOfBoundsException, SAXException {
439:
440: // Temporarily enter ERROR state, on success this state is left
441: State currentState = _state;
442: _state = ERROR;
443:
444: _characters.append(ch, start, length);
445:
446: // Reset _state
447: _state = currentState;
448: }
449:
450: /**
451: * Gets the parsed element.
452: *
453: * @return
454: * the element resulting of the parsing of the XML.
455: */
456: Element getElement() {
457:
458: // Check state
459: if (_state != FINISHED) {
460: String detail = "State is " + _state + " instead of "
461: + FINISHED;
462: throw Utils.logProgrammingError(detail);
463: }
464:
465: return _element;
466: }
467:
468: public InputSource resolveEntity(String publicId,
469: String systemId) {
470: return new InputSource(
471: new ByteArrayInputStream(new byte[0]));
472: }
473: }
474:
475: /**
476: * State of the event handler.
477: *
478: * @version $Revision: 1.42 $ $Date: 2007/09/18 11:21:01 $
479: * @author <a href="mailto:ernst@ernstdehaan.com">Ernst de Haan</a>
480: *
481: * @since XINS 1.0.0
482: */
483: private static final class State {
484:
485: /**
486: * Constructs a new <code>State</code> object.
487: *
488: * @param name
489: * the name of this state, cannot be <code>null</code>.
490: *
491: * @throws IllegalArgumentException
492: * if <code>name == null</code>.
493: */
494: State(String name) throws IllegalArgumentException {
495:
496: // Check preconditions
497: MandatoryArgumentChecker.check("name", name);
498:
499: _name = name;
500: }
501:
502: /**
503: * The name of this state. Cannot be <code>null</code>.
504: */
505: private final String _name;
506:
507: /**
508: * Returns the name of this state.
509: *
510: * @return
511: * the name of this state, cannot be <code>null</code>.
512: */
513: public String getName() {
514: return _name;
515: }
516:
517: /**
518: * Returns a textual representation of this object.
519: *
520: * @return
521: * the name of this state, never <code>null</code>.
522: */
523: public String toString() {
524: return _name;
525: }
526: }
527: }
|