001: /*
002:
003: This software is OSI Certified Open Source Software.
004: OSI Certified is a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010: package org.mmbase.util.xml;
011:
012: import java.util.*;
013: import java.util.regex.Pattern;
014:
015: import org.xml.sax.ErrorHandler;
016: import org.xml.sax.EntityResolver;
017: import org.xml.sax.InputSource;
018:
019: import org.w3c.dom.*;
020:
021: import javax.xml.parsers.DocumentBuilder;
022: import javax.xml.parsers.DocumentBuilderFactory;
023: import javax.xml.parsers.ParserConfigurationException;
024:
025: import org.mmbase.util.XMLEntityResolver;
026: import org.mmbase.util.XMLErrorHandler;
027:
028: import org.mmbase.util.logging.Logging;
029: import org.mmbase.util.logging.Logger;
030:
031: /**
032: * The DocumentReader class provides methods for loading a xml document in memory.
033: * It serves as the base class for DocumentWriter (which adds ways to write a document), and
034: * XMLBasicReader, which adds path-like methods with which to retrieve elements.
035: *
036: * This can also be a class for general static dom utilities.
037: *
038: *
039: * @author Case Roule
040: * @author Rico Jansen
041: * @author Pierre van Rooden
042: * @author Michiel Meeuwissen
043: * @version $Id: DocumentReader.java,v 1.39 2008/03/11 11:45:28 michiel Exp $
044: * @since MMBase-1.7
045: */
046: public class DocumentReader {
047: private static Logger log = Logging
048: .getLoggerInstance(DocumentReader.class);
049:
050: /** for the document builder of javax.xml. */
051: private static Map<String, DocumentBuilder> documentBuilders = Collections
052: .synchronizedMap(new HashMap<String, DocumentBuilder>());
053:
054: protected static final String FILENOTFOUND = "FILENOTFOUND://";
055:
056: /** Public ID of the Error DTD version 1.0 */
057: public static final String PUBLIC_ID_ERROR_1_0 = "-//MMBase//DTD error 1.0//EN";
058: /** DTD resource filename of the Error DTD version 1.0 */
059: public static final String DTD_ERROR_1_0 = "error_1_0.dtd";
060:
061: /** Public ID of the most recent Error DTD */
062: public static final String PUBLIC_ID_ERROR = PUBLIC_ID_ERROR_1_0;
063: /** DTD respource filename of the most recent Error DTD */
064: public static final String DTD_ERROR = DTD_ERROR_1_0;
065:
066: /**
067: * Register the Public Ids for DTDs used by XMLBasicReader
068: * This method is called by XMLEntityResolver.
069: */
070: public static void registerPublicIDs() {
071: XMLEntityResolver.registerPublicID(PUBLIC_ID_ERROR_1_0,
072: DTD_ERROR_1_0, DocumentReader.class);
073: }
074:
075: protected Document document;
076:
077: private String systemId;
078:
079: static UtilReader.PropertiesMap<String> utilProperties = null;
080:
081: /**
082: * Returns the default setting for validation for DocumentReaders.
083: * @return true if validation is on
084: */
085: protected static final boolean validate() {
086: Object validate = utilProperties == null ? null
087: : utilProperties.get("validate");
088: return validate == null || validate.equals("true");
089: }
090:
091: /**
092: * Whether to validate given a request for that. So, the request is followed, unless it is configured to 'never' validate.
093: * @since MMBase-1.8
094: */
095: protected static final boolean validate(boolean requested) {
096: Object validate = utilProperties == null ? null
097: : utilProperties.get("validate");
098: if (validate != null && validate.equals("never"))
099: return false;
100: return requested;
101: }
102:
103: /**
104: * Creates an empty document reader.
105: */
106: protected DocumentReader() {
107: }
108:
109: /**
110: * Constructs the document by reading it from a source.
111: * @param source the input source from which to read the document
112: */
113: public DocumentReader(InputSource source) {
114: this (source, validate(), null);
115: }
116:
117: /**
118: * Constructs the document by reading it from a source.
119: * @param source the input source from which to read the document
120: * @param validating whether to validate the document
121: */
122: public DocumentReader(InputSource source, boolean validating) {
123: this (source, validating, null);
124: }
125:
126: /**
127: * Constructs the document by reading it from a source.
128: * You can pass a resolve class to this constructor, allowing you to indicate the package in which the dtd
129: * of the document read is to be found. The dtd sould be in the resources package under the package of the class passed.
130: * @param source the input source from which to read the document
131: * @param resolveBase the base class whose package is used to resolve dtds, set to null if unknown
132: */
133: public DocumentReader(InputSource source, Class<?> resolveBase) {
134: this (source, DocumentReader.validate(), resolveBase);
135: }
136:
137: /**
138: * Constructs the document by reading it from a source.
139: * You can pass a resolve class to this constructor, allowing you to indicate the package in which the dtd
140: * of the document read is to be found. The dtd sould be in the resources package under the package of the class passed.
141: * @param source the input source from which to read the document
142: * @param validating whether to validate the document
143: * @param resolveBase the base class whose package is used to resolve dtds, set to null if unknown
144: */
145: public DocumentReader(InputSource source, boolean validating,
146: Class<?> resolveBase) {
147: if (source == null) {
148: throw new IllegalArgumentException(
149: "InputSource cannot be null");
150: }
151: try {
152: systemId = source.getSystemId();
153: XMLEntityResolver resolver = null;
154: if (resolveBase != null)
155: resolver = new XMLEntityResolver(validating,
156: resolveBase);
157: DocumentBuilder dbuilder = getDocumentBuilder(validating,
158: null/* no error handler */, resolver);
159: if (dbuilder == null)
160: throw new RuntimeException(
161: "failure retrieving document builder");
162: if (log != null && log.isDebugEnabled()) {
163: log.debug("Reading " + source.getSystemId());
164: }
165: document = dbuilder.parse(source);
166: } catch (org.xml.sax.SAXException se) {
167: throw new RuntimeException("failure reading document: "
168: + source.getSystemId() + "\n"
169: + Logging.stackTrace(se));
170: } catch (java.io.IOException ioe) {
171: throw new RuntimeException("failure reading document: "
172: + source.getSystemId() + "\n" + ioe, ioe);
173: }
174: }
175:
176: /**
177: * @since MMBase-1.8
178: */
179: public DocumentReader(Document doc) {
180: document = doc;
181: }
182:
183: private static boolean warnedJAXP12 = false;
184:
185: /**
186: * Creates a DocumentBuilder using SAX.
187: * @param validating if true, the documentbuilder will validate documents read
188: * @param xsd Whether to use XSD for validating
189: * @param handler a ErrorHandler class to use for catching parsing errors, pass null to use a default handler
190: * @param resolver a EntityResolver class used for resolving the document's dtd, pass null to use a default resolver
191: * @return a DocumentBuilder instance, or null if none could be created
192: */
193: private static DocumentBuilder createDocumentBuilder(
194: boolean validating, boolean xsd, ErrorHandler handler,
195: EntityResolver resolver) {
196: DocumentBuilder db;
197: if (handler == null)
198: handler = new XMLErrorHandler();
199: if (resolver == null)
200: resolver = new XMLEntityResolver(validating);
201: try {
202: // get a new documentbuilder...
203: DocumentBuilderFactory dfactory = DocumentBuilderFactory
204: .newInstance();
205: // get document builder AFTER setting the validation
206: dfactory.setValidating(validating);
207: dfactory.setXIncludeAware(true);
208: if (validating && xsd) {
209: try {
210: dfactory
211: .setAttribute(
212: "http://java.sun.com/xml/jaxp/properties/schemaLanguage",
213: "http://www.w3.org/2001/XMLSchema");
214: } catch (IllegalArgumentException iae) {
215: if (!warnedJAXP12) {
216: log
217: .warn(
218: "The XML parser does not support JAXP 1.2, XSD validation will not work.",
219: iae);
220: warnedJAXP12 = true;
221: }
222: }
223: }
224: dfactory.setNamespaceAware(true);
225:
226: db = dfactory.newDocumentBuilder();
227:
228: db.setErrorHandler(handler);
229:
230: // set the entity resolver... which tell us where to find the dtd's
231: db.setEntityResolver(resolver);
232:
233: } catch (ParserConfigurationException pce) {
234: log
235: .error("a DocumentBuilder cannot be created which satisfies the configuration requested");
236: log.error(Logging.stackTrace(pce));
237: return null;
238: }
239: return db;
240: }
241:
242: /**
243: * Creates a DocumentBuilder with default settings for handler, resolver, or validation,
244: * obtaining it from the cache if available.
245: * @return a DocumentBuilder instance, or null if none could be created
246: */
247: public static DocumentBuilder getDocumentBuilder() {
248: return getDocumentBuilder(validate(), null, null);
249: }
250:
251: /**
252: * Obtain a DocumentBuilder
253: */
254: public static DocumentBuilder getDocumentBuilder(boolean validating) {
255: return DocumentReader
256: .getDocumentBuilder(validating, null, null);
257: }
258:
259: /**
260: * See {@link #getDocumentBuilder(boolean, ErrorHandler, EntityResolver)}
261: */
262: public static DocumentBuilder getDocumentBuilder(
263: boolean validating, ErrorHandler handler,
264: EntityResolver resolver) {
265: return getDocumentBuilder(validating, false, handler, resolver);
266: }
267:
268: /**
269: * Creates a DocumentBuilder.
270: * DocumentBuilders that use the default error handler or entity resolver are cached (one for validating,
271: * one for non-validating document buidlers).
272: * @param validating if true, the documentbuilder will validate documents read
273: * @param xsd if true, validating will be done by an XML schema definiton.
274: * @param handler a ErrorHandler class to use for catching parsing errors, pass null to use the default handler
275: * @param resolver a EntityResolver class used for resolving the document's dtd, pass null to use the default resolver
276: * @return a DocumentBuilder instance, or null if none could be created
277: * @since MMBase-1.8.
278: */
279: public static DocumentBuilder getDocumentBuilder(
280: boolean validating, boolean xsd, ErrorHandler handler,
281: EntityResolver resolver) {
282: validating = validate(validating);
283: if (handler == null && resolver == null) {
284: String key = "" + validating + xsd;
285: DocumentBuilder db = documentBuilders.get(key);
286: if (db == null) {
287: db = createDocumentBuilder(validating, xsd, null, null);
288: documentBuilders.put(key, db);
289: }
290: return db;
291: } else {
292: return createDocumentBuilder(validating, xsd, handler,
293: resolver);
294: }
295: }
296:
297: /**
298: * Return the text value of a node.
299: * It includes the contents of all child textnodes and CDATA sections, but ignores
300: * everything else (such as comments)
301: * The code trims excessive whitespace unless it is included in a CDATA section.
302: *
303: * @param n the Node whose value to determine
304: * @return a String representing the node's textual value
305: */
306: public static String getNodeTextValue(Node n) {
307: return getNodeTextValue(n, true);
308: }
309:
310: /**
311: * @since MMBase-1.8.5
312: */
313: public static String getNodeTextValue(Node n, boolean trim) {
314: NodeList nl = n.getChildNodes();
315: StringBuilder res = new StringBuilder();
316: for (int i = 0; i < nl.getLength(); i++) {
317: Node textnode = nl.item(i);
318: if (textnode.getNodeType() == Node.TEXT_NODE) {
319: String s = textnode.getNodeValue();
320: if (trim)
321: s = s.trim();
322: res.append(s);
323: } else if (textnode.getNodeType() == Node.CDATA_SECTION_NODE) {
324: res.append(textnode.getNodeValue());
325: }
326: }
327: return res.toString();
328: }
329:
330: /**
331: * @since MMBase-1.8.1
332: */
333: public static void setNodeTextValue(Node n, String value) {
334: Node child = n.getFirstChild();
335: while (child != null) {
336: Node next = child.getNextSibling();
337: n.removeChild(child);
338: child = next;
339: }
340: Text text = n.getOwnerDocument().createTextNode(value);
341: n.appendChild(text);
342: }
343:
344: /**
345: * @since MMBase-1.8.5
346: */
347: public static void setPrefix(Document d, String ns, String prefix) {
348: NodeList nl = d.getElementsByTagName("*");
349: for (int i = 0; i < nl.getLength(); i++) {
350: Node element = nl.item(i);
351: if (ns.equals(element.getNamespaceURI())) {
352: element.setPrefix(prefix);
353: }
354: }
355: }
356:
357: /**
358: * Returns whether an element has a certain attribute, either an unqualified attribute or an attribute that fits in the
359: * passed namespace
360: */
361: static public boolean hasAttribute(Element element,
362: String nameSpace, String localName) {
363: return element.hasAttributeNS(nameSpace, localName)
364: || element.hasAttribute(localName);
365: }
366:
367: /**
368: * Returns the value of a certain attribute, either an unqualified attribute or an attribute that fits in the
369: * passed namespace
370: */
371: static public String getAttribute(Element element,
372: String nameSpace, String localName) {
373: if (element.hasAttributeNS(nameSpace, localName)) {
374: return element.getAttributeNS(nameSpace, localName);
375: } else {
376: return element.getAttribute(localName);
377: }
378: }
379:
380: /**
381: * Utility method to make a document of an element.
382: * @since MMBase-1.8
383: */
384: static public Document toDocument(Element element) {
385: DocumentBuilder documentBuilder = getDocumentBuilder(false,
386: null, null);
387: DOMImplementation impl = documentBuilder.getDOMImplementation();
388: Document document = impl.createDocument(element
389: .getNamespaceURI(), element.getLocalName(), null);
390: Element dest = document.getDocumentElement();
391: Element copy = (Element) document.importNode(element, false);
392: NamedNodeMap attributes = copy.getAttributes();
393: for (int i = 0; i < attributes.getLength(); i++) {
394: Attr attribute = (Attr) (attributes.item(i).cloneNode(true));
395: dest.setAttributeNode(attribute);
396:
397: }
398: NodeList childs = element.getChildNodes();
399: for (int i = 0; i < childs.getLength(); i++) {
400: Node child = document.importNode(childs.item(i), true);
401: dest.appendChild(child);
402: }
403: document.normalize();
404: return document;
405: }
406:
407: /**
408: * Appends a child to a parent at the right position. The right position is defined by a comma
409: * separated list of regular expressions. If the the child matches the last element of the
410: * path, then the child is appended after similer childs, if not, then it will be appended
411: * before them.
412: *
413: * @param parent The parent element, to which a new child will be added
414: * @param newChild this new child
415: * @param path The beforementioned comma separated list of regexps. See also {@link
416: * java.util.regex.Pattern};
417: * Namespace prefixes are ignored.
418: * @since MMBase-1.8
419: */
420: static public void appendChild(Element parent, Element newChild,
421: String path) {
422: String[] p = path.split(",");
423: int i = 0;
424: Node refChild = null;
425: NodeList childs = parent.getChildNodes();
426: int j = 0;
427: Pattern pattern = null;
428: if (p.length > 0)
429: pattern = Pattern.compile("\\A" + p[i] + "\\z");
430: boolean matching = false;
431: while (j < childs.getLength() && i < p.length) {
432: if (childs.item(j) instanceof Element) {
433: Element child = (Element) childs.item(j);
434: if (pattern.matcher(child.getLocalName()).matches()) {
435: j++;
436: refChild = childs.item(j);
437: matching = true;
438: } else {
439: if (!matching) { // append at the beginning, because actual child list does not start llike path
440: refChild = childs.item(j);
441: break;
442: }
443: i++;
444: pattern = i < p.length ? Pattern.compile("\\A"
445: + p[i] + "\\z") : null;
446: }
447: } else {
448: j++;
449: }
450: }
451: parent.insertBefore(newChild, refChild);
452: }
453:
454: /**
455: * Returns the systemID of the InputSource used to read the document.
456: * This is generally the document's file path.
457: * @return the systemID as a String
458: *
459: * @since MMBase-1.8
460: */
461: public String getSystemId() {
462: return systemId;
463: }
464:
465: /**
466: * @since MMBase-1.8
467: */
468: public void setSystemId(String url) {
469: systemId = url;
470: }
471:
472: /**
473: * @param e Element
474: * @return Tag name of the element
475: */
476: public String getElementName(Element e) {
477: return e.getLocalName();
478: }
479:
480: /**
481: * @param path Path to the element
482: * @param attr Attribute name
483: * @return Value of attribute
484: */
485: public String getElementAttributeValue(String path, String attr) {
486: return getElementAttributeValue(getElementByPath(path), attr);
487: }
488:
489: /**
490: * @param e Element
491: * @param attr Attribute name
492: * @return Value of attribute
493: */
494: public String getElementAttributeValue(Element e, String attr) {
495: if (e == null) {
496: return "";
497: } else {
498: return e.getAttribute(attr);
499: }
500: }
501:
502: /**
503: * Determine the root element of the contained document
504: * @return root element
505: * @deprecated
506: */
507: public Element getRootElement() {
508: if (document == null) {
509: log
510: .error("Document is not defined, cannot get root element");
511: }
512: return document.getDocumentElement();
513: }
514:
515: /**
516: * @param path Dot-separated list of tags describing path from root element to requested element.
517: * NB the path starts with the name of the root element.
518: * @return Leaf element of the path
519: */
520: public Element getElementByPath(String path) {
521: if (document == null) {
522: log.error("Document is not defined, cannot get " + path);
523: }
524: return getElementByPath(document.getDocumentElement(), path);
525: }
526:
527: /**
528: * @param e Element from which the "relative" path is starting.
529: * NB the path starts with the name of the root element.
530: * @param path Dot-separated list of tags describing path from root element to requested element
531: * @return Leaf element of the path
532: */
533: public Element getElementByPath(Element e, String path) {
534: StringTokenizer st = new StringTokenizer(path, ".");
535: if (!st.hasMoreTokens()) {
536: // faulty path
537: log.error("No tokens in path");
538: return null;
539: } else {
540: String root = st.nextToken();
541: if (e.getLocalName().equals("error")) {
542: // path should start with document root element
543: log.error("Error occurred : (" + getElementValue(e)
544: + ")");
545: return null;
546: } else if (!e.getLocalName().equals(root)) {
547: // path should start with document root element
548: log.error("path [" + path + "] with root (" + root
549: + ") doesn't start with root element ("
550: + e.getLocalName() + "): incorrect xml file"
551: + "(" + getSystemId() + ")");
552: return null;
553: }
554: OUTER: while (st.hasMoreTokens()) {
555: String tag = st.nextToken();
556: NodeList nl = e.getChildNodes();
557: for (int i = 0; i < nl.getLength(); i++) {
558: if (!(nl.item(i) instanceof Element))
559: continue;
560: e = (Element) nl.item(i);
561: if (e.getLocalName().equals(tag))
562: continue OUTER;
563: }
564: // Handle error!
565: return null;
566: }
567: return e;
568: }
569: }
570:
571: /**
572: * @param path Path to the element
573: * @return Text value of element
574: */
575: public String getElementValue(String path) {
576: return getElementValue(getElementByPath(path));
577: }
578:
579: /**
580: * @param e Element
581: * @return Text value of element
582: */
583: public String getElementValue(Element e) {
584: if (e == null) {
585: return "";
586: } else {
587: return getNodeTextValue(e);
588: }
589: }
590:
591: /**
592: * @param path Path to the element
593: * @return a <code>List</code> of child elements
594: */
595: public List<Element> getChildElements(String path) {
596: return getChildElements(getElementByPath(path));
597: }
598:
599: /**
600: * @param e Element
601: * @return a <code>List</code> of child elements
602: */
603: public List<Element> getChildElements(Element e) {
604: return getChildElements(e, "*");
605: }
606:
607: /**
608: * @param path Path to the element
609: * @param tag tag to match ("*" means all tags")
610: * @return a <code>List</code> of child elements with the given tag
611: */
612: public List<Element> getChildElements(String path, String tag) {
613: return getChildElements(getElementByPath(path), tag);
614: }
615:
616: /**
617: * @param e Element
618: * @param tag tag to match ("*" means all tags")
619: * @return a <code>List</code> of child elements with the given tag
620: */
621: public List<Element> getChildElements(Element e, String tag) {
622: List<Element> v = new ArrayList<Element>();
623: boolean ignoretag = tag.equals("*");
624: if (e != null) {
625: NodeList nl = e.getChildNodes();
626: for (int i = 0; i < nl.getLength(); i++) {
627: Node n = nl.item(i);
628: if (n.getNodeType() == Node.ELEMENT_NODE
629: && (ignoretag || ((Element) n).getLocalName()
630: .equalsIgnoreCase(tag))) {
631: v.add((Element) n);
632: }
633: }
634: }
635: return v;
636: }
637:
638: public static void main(String argv[]) throws Exception {
639: org.mmbase.util.ResourceLoader.getSystemRoot().getDocument(
640: argv[0]);
641: }
642: }
|