001: // ========================================================================
002: // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
003: // ------------------------------------------------------------------------
004: // Licensed under the Apache License, Version 2.0 (the "License");
005: // you may not use this file except in compliance with the License.
006: // You may obtain a copy of the License at
007: // http://www.apache.org/licenses/LICENSE-2.0
008: // Unless required by applicable law or agreed to in writing, software
009: // distributed under the License is distributed on an "AS IS" BASIS,
010: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
011: // See the License for the specific language governing permissions and
012: // limitations under the License.
013: // ========================================================================
014:
015: package org.mortbay.xml;
016:
017: import java.io.File;
018: import java.io.IOException;
019: import java.io.InputStream;
020: import java.net.URL;
021: import java.util.AbstractList;
022: import java.util.ArrayList;
023: import java.util.HashMap;
024: import java.util.Iterator;
025: import java.util.Map;
026: import java.util.NoSuchElementException;
027: import java.util.Stack;
028: import java.util.StringTokenizer;
029:
030: import javax.xml.parsers.SAXParser;
031: import javax.xml.parsers.SAXParserFactory;
032:
033: import org.mortbay.log.Log;
034: import org.mortbay.util.LazyList;
035: import org.xml.sax.Attributes;
036: import org.xml.sax.ContentHandler;
037: import org.xml.sax.InputSource;
038: import org.xml.sax.SAXException;
039: import org.xml.sax.SAXParseException;
040: import org.xml.sax.XMLReader;
041: import org.xml.sax.helpers.DefaultHandler;
042:
043: /*--------------------------------------------------------------*/
044: /**
045: * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
046: * entity handlers and a mini dom-like document tree.
047: * <P>
048: * By default, the parser is created as a validating parser only if xerces is present. This can be
049: * configured by setting the "org.mortbay.xml.XmlParser.Validating" system property.
050: *
051: * @author Greg Wilkins (gregw)
052: */
053: public class XmlParser {
054: private Map _redirectMap = new HashMap();
055: private SAXParser _parser;
056: private Map _observerMap;
057: private Stack _observers = new Stack();
058: private String _xpath;
059: private Object _xpaths;
060: private String _dtd;
061:
062: /* ------------------------------------------------------------ */
063: /**
064: * Construct
065: */
066: public XmlParser() {
067: SAXParserFactory factory = SAXParserFactory.newInstance();
068: boolean validating_dft = factory.getClass().toString()
069: .startsWith("org.apache.xerces.");
070: String validating_prop = System.getProperty(
071: "org.mortbay.xml.XmlParser.Validating",
072: validating_dft ? "true" : "false");
073: boolean notValidating = Boolean
074: .getBoolean("org.mortbay.xml.XmlParser.NotValidating"); // deprecated!
075: boolean validating = !notValidating
076: && Boolean.valueOf(validating_prop).booleanValue();
077:
078: setValidating(validating);
079: }
080:
081: /* ------------------------------------------------------------ */
082: /**
083: * Constructor.
084: */
085: public XmlParser(boolean validating) {
086: setValidating(validating);
087: }
088:
089: /* ------------------------------------------------------------ */
090: public void setValidating(boolean validating) {
091: try {
092: SAXParserFactory factory = SAXParserFactory.newInstance();
093: factory.setValidating(validating);
094: _parser = factory.newSAXParser();
095:
096: try {
097: if (validating)
098: _parser
099: .getXMLReader()
100: .setFeature(
101: "http://apache.org/xml/features/validation/schema",
102: validating);
103: } catch (Exception e) {
104: if (validating)
105: Log.warn(
106: "Schema validation may not be supported: ",
107: e);
108: else
109: Log.ignore(e);
110: }
111:
112: _parser.getXMLReader().setFeature(
113: "http://xml.org/sax/features/validation",
114: validating);
115: _parser.getXMLReader().setFeature(
116: "http://xml.org/sax/features/namespaces",
117: validating);
118: _parser.getXMLReader().setFeature(
119: "http://xml.org/sax/features/namespace-prefixes",
120: validating);
121: } catch (Exception e) {
122: Log.warn(Log.EXCEPTION, e);
123: throw new Error(e.toString());
124: }
125: }
126:
127: /* ------------------------------------------------------------ */
128: /**
129: * @param name
130: * @param entity
131: */
132: public synchronized void redirectEntity(String name, URL entity) {
133: if (entity != null)
134: _redirectMap.put(name, entity);
135: }
136:
137: /* ------------------------------------------------------------ */
138: /**
139: *
140: * @return Returns the xpath.
141: */
142: public String getXpath() {
143: return _xpath;
144: }
145:
146: /* ------------------------------------------------------------ */
147: /**
148: * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
149: * only path like "/node1/nodeA | /node1/nodeB" are supported.
150: *
151: * @param xpath The xpath to set.
152: */
153: public void setXpath(String xpath) {
154: _xpath = xpath;
155: StringTokenizer tok = new StringTokenizer(xpath, "| ");
156: while (tok.hasMoreTokens())
157: _xpaths = LazyList.add(_xpaths, tok.nextToken());
158: }
159:
160: /* ------------------------------------------------------------ */
161: public String getDTD() {
162: return _dtd;
163: }
164:
165: /* ------------------------------------------------------------ */
166: /**
167: * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
168: * events are passed to the ContentHandler provided from a matching start element to the
169: * corresponding end element. Only a single _content handler can be registered against each tag.
170: *
171: * @param trigger Tag local or q name.
172: * @param observer SAX ContentHandler
173: */
174: public synchronized void addContentHandler(String trigger,
175: ContentHandler observer) {
176: if (_observerMap == null)
177: _observerMap = new HashMap();
178: _observerMap.put(trigger, observer);
179: }
180:
181: /* ------------------------------------------------------------ */
182: public synchronized Node parse(InputSource source)
183: throws IOException, SAXException {
184: _dtd = null;
185: Handler handler = new Handler();
186: XMLReader reader = _parser.getXMLReader();
187: reader.setContentHandler(handler);
188: reader.setErrorHandler(handler);
189: reader.setEntityResolver(handler);
190: if (Log.isDebugEnabled())
191: Log.debug("parsing: sid=" + source.getSystemId() + ",pid="
192: + source.getPublicId());
193: _parser.parse(source, handler);
194: if (handler._error != null)
195: throw handler._error;
196: Node doc = (Node) handler._top.get(0);
197: handler.clear();
198: return doc;
199: }
200:
201: /* ------------------------------------------------------------ */
202: /**
203: * Parse String URL.
204: */
205: public synchronized Node parse(String url) throws IOException,
206: SAXException {
207: if (Log.isDebugEnabled())
208: Log.debug("parse: " + url);
209: return parse(new InputSource(url));
210: }
211:
212: /* ------------------------------------------------------------ */
213: /**
214: * Parse File.
215: */
216: public synchronized Node parse(File file) throws IOException,
217: SAXException {
218: if (Log.isDebugEnabled())
219: Log.debug("parse: " + file);
220: return parse(new InputSource(file.toURL().toString()));
221: }
222:
223: /* ------------------------------------------------------------ */
224: /**
225: * Parse InputStream.
226: */
227: public synchronized Node parse(InputStream in) throws IOException,
228: SAXException {
229: _dtd = null;
230: Handler handler = new Handler();
231: XMLReader reader = _parser.getXMLReader();
232: reader.setContentHandler(handler);
233: reader.setErrorHandler(handler);
234: reader.setEntityResolver(handler);
235: _parser.parse(new InputSource(in), handler);
236: if (handler._error != null)
237: throw handler._error;
238: Node doc = (Node) handler._top.get(0);
239: handler.clear();
240: return doc;
241: }
242:
243: /* ------------------------------------------------------------ */
244: /* ------------------------------------------------------------ */
245: private class NoopHandler extends DefaultHandler {
246: Handler _next;
247: int _depth;
248:
249: NoopHandler(Handler next) {
250: this ._next = next;
251: }
252:
253: /* ------------------------------------------------------------ */
254: public void startElement(String uri, String localName,
255: String qName, Attributes attrs) throws SAXException {
256: _depth++;
257: }
258:
259: /* ------------------------------------------------------------ */
260: public void endElement(String uri, String localName,
261: String qName) throws SAXException {
262: if (_depth == 0)
263: _parser.getXMLReader().setContentHandler(_next);
264: else
265: _depth--;
266: }
267: }
268:
269: /* ------------------------------------------------------------ */
270: /* ------------------------------------------------------------ */
271: private class Handler extends DefaultHandler {
272: Node _top = new Node(null, null, null);
273: SAXParseException _error;
274: private Node _context = _top;
275: private NoopHandler _noop;
276:
277: Handler() {
278: _noop = new NoopHandler(this );
279: }
280:
281: /* ------------------------------------------------------------ */
282: void clear() {
283: _top = null;
284: _error = null;
285: _context = null;
286: }
287:
288: /* ------------------------------------------------------------ */
289: public void startElement(String uri, String localName,
290: String qName, Attributes attrs) throws SAXException {
291: String name = (uri == null || uri.equals("")) ? qName
292: : localName;
293: Node node = new Node(_context, name, attrs);
294:
295: // check if the node matches any xpaths set?
296: if (_xpaths != null) {
297: String path = node.getPath();
298: boolean match = false;
299: for (int i = LazyList.size(_xpaths); !match && i-- > 0;) {
300: String xpath = (String) LazyList.get(_xpaths, i);
301:
302: match = path.equals(xpath)
303: || xpath.startsWith(path)
304: && xpath.length() > path.length()
305: && xpath.charAt(path.length()) == '/';
306: }
307:
308: if (match) {
309: _context.add(node);
310: _context = node;
311: } else {
312: _parser.getXMLReader().setContentHandler(_noop);
313: }
314: } else {
315: _context.add(node);
316: _context = node;
317: }
318:
319: ContentHandler observer = null;
320: if (_observerMap != null)
321: observer = (ContentHandler) _observerMap.get(name);
322: _observers.push(observer);
323:
324: for (int i = 0; i < _observers.size(); i++)
325: if (_observers.get(i) != null)
326: ((ContentHandler) _observers.get(i)).startElement(
327: uri, localName, qName, attrs);
328: }
329:
330: /* ------------------------------------------------------------ */
331: public void endElement(String uri, String localName,
332: String qName) throws SAXException {
333: _context = _context._parent;
334: for (int i = 0; i < _observers.size(); i++)
335: if (_observers.get(i) != null)
336: ((ContentHandler) _observers.get(i)).endElement(
337: uri, localName, qName);
338: _observers.pop();
339: }
340:
341: /* ------------------------------------------------------------ */
342: public void ignorableWhitespace(char buf[], int offset, int len)
343: throws SAXException {
344: for (int i = 0; i < _observers.size(); i++)
345: if (_observers.get(i) != null)
346: ((ContentHandler) _observers.get(i))
347: .ignorableWhitespace(buf, offset, len);
348: }
349:
350: /* ------------------------------------------------------------ */
351: public void characters(char buf[], int offset, int len)
352: throws SAXException {
353: _context.add(new String(buf, offset, len));
354: for (int i = 0; i < _observers.size(); i++)
355: if (_observers.get(i) != null)
356: ((ContentHandler) _observers.get(i)).characters(
357: buf, offset, len);
358: }
359:
360: /* ------------------------------------------------------------ */
361: public void warning(SAXParseException ex) {
362: Log.debug(Log.EXCEPTION, ex);
363: Log.warn("WARNING@" + getLocationString(ex) + " : "
364: + ex.toString());
365: }
366:
367: /* ------------------------------------------------------------ */
368: public void error(SAXParseException ex) throws SAXException {
369: // Save error and continue to report other errors
370: if (_error == null)
371: _error = ex;
372: Log.debug(Log.EXCEPTION, ex);
373: Log.warn("ERROR@" + getLocationString(ex) + " : "
374: + ex.toString());
375: }
376:
377: /* ------------------------------------------------------------ */
378: public void fatalError(SAXParseException ex)
379: throws SAXException {
380: _error = ex;
381: Log.debug(Log.EXCEPTION, ex);
382: Log.warn("FATAL@" + getLocationString(ex) + " : "
383: + ex.toString());
384: throw ex;
385: }
386:
387: /* ------------------------------------------------------------ */
388: private String getLocationString(SAXParseException ex) {
389: return ex.getSystemId() + " line:" + ex.getLineNumber()
390: + " col:" + ex.getColumnNumber();
391: }
392:
393: /* ------------------------------------------------------------ */
394: public InputSource resolveEntity(String pid, String sid) {
395: if (Log.isDebugEnabled())
396: Log.debug("resolveEntity(" + pid + ", " + sid + ")");
397:
398: if (sid != null && sid.endsWith(".dtd"))
399: _dtd = sid;
400:
401: URL entity = null;
402: if (pid != null)
403: entity = (URL) _redirectMap.get(pid);
404: if (entity == null)
405: entity = (URL) _redirectMap.get(sid);
406: if (entity == null) {
407: String dtd = sid;
408: if (dtd.lastIndexOf('/') >= 0)
409: dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
410:
411: if (Log.isDebugEnabled())
412: Log
413: .debug("Can't exact match entity in redirect map, trying "
414: + dtd);
415: entity = (URL) _redirectMap.get(dtd);
416: }
417:
418: if (entity != null) {
419: try {
420: InputStream in = entity.openStream();
421: if (Log.isDebugEnabled())
422: Log.debug("Redirected entity " + sid + " --> "
423: + entity);
424: InputSource is = new InputSource(in);
425: is.setSystemId(sid);
426: return is;
427: } catch (IOException e) {
428: Log.ignore(e);
429: }
430: }
431: return null;
432: }
433: }
434:
435: /* ------------------------------------------------------------ */
436: /* ------------------------------------------------------------ */
437: /**
438: * XML Attribute.
439: */
440: public static class Attribute {
441: private String _name;
442: private String _value;
443:
444: Attribute(String n, String v) {
445: _name = n;
446: _value = v;
447: }
448:
449: public String getName() {
450: return _name;
451: }
452:
453: public String getValue() {
454: return _value;
455: }
456: }
457:
458: /* ------------------------------------------------------------ */
459: /* ------------------------------------------------------------ */
460: /**
461: * XML Node. Represents an XML element with optional attributes and ordered content.
462: */
463: public static class Node extends AbstractList {
464: Node _parent;
465: private ArrayList _list;
466: private String _tag;
467: private Attribute[] _attrs;
468: private boolean _lastString = false;
469: private String _path;
470:
471: /* ------------------------------------------------------------ */
472: Node(Node parent, String tag, Attributes attrs) {
473: _parent = parent;
474: _tag = tag;
475:
476: if (attrs != null) {
477: _attrs = new Attribute[attrs.getLength()];
478: for (int i = 0; i < attrs.getLength(); i++) {
479: String name = attrs.getLocalName(i);
480: if (name == null || name.equals(""))
481: name = attrs.getQName(i);
482: _attrs[i] = new Attribute(name, attrs.getValue(i));
483: }
484: }
485: }
486:
487: /* ------------------------------------------------------------ */
488: public Node getParent() {
489: return _parent;
490: }
491:
492: /* ------------------------------------------------------------ */
493: public String getTag() {
494: return _tag;
495: }
496:
497: /* ------------------------------------------------------------ */
498: public String getPath() {
499: if (_path == null) {
500: if (getParent() != null && getParent().getTag() != null)
501: _path = getParent().getPath() + "/" + _tag;
502: else
503: _path = "/" + _tag;
504: }
505: return _path;
506: }
507:
508: /* ------------------------------------------------------------ */
509: /**
510: * Get an array of element attributes.
511: */
512: public Attribute[] getAttributes() {
513: return _attrs;
514: }
515:
516: /* ------------------------------------------------------------ */
517: /**
518: * Get an element attribute.
519: *
520: * @return attribute or null.
521: */
522: public String getAttribute(String name) {
523: return getAttribute(name, null);
524: }
525:
526: /* ------------------------------------------------------------ */
527: /**
528: * Get an element attribute.
529: *
530: * @return attribute or null.
531: */
532: public String getAttribute(String name, String dft) {
533: if (_attrs == null || name == null)
534: return dft;
535: for (int i = 0; i < _attrs.length; i++)
536: if (name.equals(_attrs[i].getName()))
537: return _attrs[i].getValue();
538: return dft;
539: }
540:
541: /* ------------------------------------------------------------ */
542: /**
543: * Get the number of children nodes.
544: */
545: public int size() {
546: if (_list != null)
547: return _list.size();
548: return 0;
549: }
550:
551: /* ------------------------------------------------------------ */
552: /**
553: * Get the ith child node or content.
554: *
555: * @return Node or String.
556: */
557: public Object get(int i) {
558: if (_list != null)
559: return _list.get(i);
560: return null;
561: }
562:
563: /* ------------------------------------------------------------ */
564: /**
565: * Get the first child node with the tag.
566: *
567: * @param tag
568: * @return Node or null.
569: */
570: public Node get(String tag) {
571: if (_list != null) {
572: for (int i = 0; i < _list.size(); i++) {
573: Object o = _list.get(i);
574: if (o instanceof Node) {
575: Node n = (Node) o;
576: if (tag.equals(n._tag))
577: return n;
578: }
579: }
580: }
581: return null;
582: }
583:
584: /* ------------------------------------------------------------ */
585: public void add(int i, Object o) {
586: if (_list == null)
587: _list = new ArrayList();
588: if (o instanceof String) {
589: if (_lastString) {
590: int last = _list.size() - 1;
591: _list.set(last, (String) _list.get(last) + o);
592: } else
593: _list.add(i, o);
594: _lastString = true;
595: } else {
596: _lastString = false;
597: _list.add(i, o);
598: }
599: }
600:
601: /* ------------------------------------------------------------ */
602: public void clear() {
603: if (_list != null)
604: _list.clear();
605: _list = null;
606: }
607:
608: /* ------------------------------------------------------------ */
609: /**
610: * Get a tag as a string.
611: *
612: * @param tag The tag to get
613: * @param tags IF true, tags are included in the value.
614: * @param trim If true, trim the value.
615: * @return results of get(tag).toString(tags).
616: */
617: public String getString(String tag, boolean tags, boolean trim) {
618: Node node = get(tag);
619: if (node == null)
620: return null;
621: String s = node.toString(tags);
622: if (s != null && trim)
623: s = s.trim();
624: return s;
625: }
626:
627: /* ------------------------------------------------------------ */
628: public synchronized String toString() {
629: return toString(true);
630: }
631:
632: /* ------------------------------------------------------------ */
633: /**
634: * Convert to a string.
635: *
636: * @param tag If false, only _content is shown.
637: */
638: public synchronized String toString(boolean tag) {
639: StringBuffer buf = new StringBuffer();
640: synchronized (buf) {
641: toString(buf, tag);
642: return buf.toString();
643: }
644: }
645:
646: /* ------------------------------------------------------------ */
647: /**
648: * Convert to a string.
649: *
650: * @param tag If false, only _content is shown.
651: */
652: public synchronized String toString(boolean tag, boolean trim) {
653: String s = toString(tag);
654: if (s != null && trim)
655: s = s.trim();
656: return s;
657: }
658:
659: /* ------------------------------------------------------------ */
660: private synchronized void toString(StringBuffer buf, boolean tag) {
661: if (tag) {
662: buf.append("<");
663: buf.append(_tag);
664:
665: if (_attrs != null) {
666: for (int i = 0; i < _attrs.length; i++) {
667: buf.append(' ');
668: buf.append(_attrs[i].getName());
669: buf.append("=\"");
670: buf.append(_attrs[i].getValue());
671: buf.append("\"");
672: }
673: }
674: }
675:
676: if (_list != null) {
677: if (tag)
678: buf.append(">");
679: for (int i = 0; i < _list.size(); i++) {
680: Object o = _list.get(i);
681: if (o == null)
682: continue;
683: if (o instanceof Node)
684: ((Node) o).toString(buf, tag);
685: else
686: buf.append(o.toString());
687: }
688: if (tag) {
689: buf.append("</");
690: buf.append(_tag);
691: buf.append(">");
692: }
693: } else if (tag)
694: buf.append("/>");
695: }
696:
697: /* ------------------------------------------------------------ */
698: /**
699: * Iterator over named child nodes.
700: *
701: * @param tag The tag of the nodes.
702: * @return Iterator over all child nodes with the specified tag.
703: */
704: public Iterator iterator(final String tag) {
705: return new Iterator() {
706: int c = 0;
707: Node _node;
708:
709: /* -------------------------------------------------- */
710: public boolean hasNext() {
711: if (_node != null)
712: return true;
713: while (_list != null && c < _list.size()) {
714: Object o = _list.get(c);
715: if (o instanceof Node) {
716: Node n = (Node) o;
717: if (tag.equals(n._tag)) {
718: _node = n;
719: return true;
720: }
721: }
722: c++;
723: }
724: return false;
725: }
726:
727: /* -------------------------------------------------- */
728: public Object next() {
729: try {
730: if (hasNext())
731: return _node;
732: throw new NoSuchElementException();
733: } finally {
734: _node = null;
735: c++;
736: }
737: }
738:
739: /* -------------------------------------------------- */
740: public void remove() {
741: throw new UnsupportedOperationException(
742: "Not supported");
743: }
744: };
745: }
746: }
747: }
|