0001: /*
0002: * Copyright 1999-2004 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016: /*
0017: * $Id: DOMHelper.java,v 1.7 2005/06/07 15:35:52 mkwan Exp $
0018: */
0019: package org.apache.xml.utils;
0020:
0021: import java.util.Hashtable;
0022: import java.util.Vector;
0023:
0024: import javax.xml.XMLConstants;
0025: import javax.xml.parsers.DocumentBuilder;
0026: import javax.xml.parsers.DocumentBuilderFactory;
0027: import javax.xml.parsers.ParserConfigurationException;
0028:
0029: import org.apache.xml.dtm.ref.DTMNodeProxy;
0030: import org.apache.xml.res.XMLErrorResources;
0031: import org.apache.xml.res.XMLMessages;
0032:
0033: import org.w3c.dom.Attr;
0034: import org.w3c.dom.DOMImplementation;
0035: import org.w3c.dom.Document;
0036: import org.w3c.dom.DocumentType;
0037: import org.w3c.dom.Element;
0038: import org.w3c.dom.Entity;
0039: import org.w3c.dom.NamedNodeMap;
0040: import org.w3c.dom.Node;
0041: import org.w3c.dom.Text;
0042:
0043: /**
0044: * @deprecated Since the introduction of the DTM, this class will be removed.
0045: * This class provides a front-end to DOM implementations, providing
0046: * a number of utility functions that either aren't yet standardized
0047: * by the DOM spec or that are defined in optional DOM modules and
0048: * hence may not be present in all DOMs.
0049: */
0050: public class DOMHelper {
0051:
0052: /**
0053: * DOM Level 1 did not have a standard mechanism for creating a new
0054: * Document object. This function provides a DOM-implementation-independent
0055: * abstraction for that for that concept. It's typically used when
0056: * outputting a new DOM as the result of an operation.
0057: * <p>
0058: * TODO: This isn't directly compatable with DOM Level 2.
0059: * The Level 2 createDocument call also creates the root
0060: * element, and thus requires that you know what that element will be
0061: * before creating the Document. We should think about whether we want
0062: * to change this code, and the callers, so we can use the DOM's own
0063: * method. (It's also possible that DOM Level 3 may relax this
0064: * sequence, but you may give up some intelligence in the DOM by
0065: * doing so; the intent was that knowing the document type and root
0066: * element might let the DOM automatically switch to a specialized
0067: * subclass for particular kinds of documents.)
0068: *
0069: * @param isSecureProcessing state of the secure processing feature.
0070: * @return The newly created DOM Document object, with no children, or
0071: * null if we can't find a DOM implementation that permits creating
0072: * new empty Documents.
0073: */
0074: public static Document createDocument(boolean isSecureProcessing) {
0075:
0076: try {
0077:
0078: // Use an implementation of the JAVA API for XML Parsing 1.0 to
0079: // create a DOM Document node to contain the result.
0080: DocumentBuilderFactory dfactory = DocumentBuilderFactory
0081: .newInstance();
0082:
0083: dfactory.setNamespaceAware(true);
0084: dfactory.setValidating(true);
0085:
0086: if (isSecureProcessing) {
0087: try {
0088: dfactory.setFeature(
0089: XMLConstants.FEATURE_SECURE_PROCESSING,
0090: true);
0091: } catch (ParserConfigurationException pce) {
0092: }
0093: }
0094:
0095: DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
0096: Document outNode = docBuilder.newDocument();
0097:
0098: return outNode;
0099: } catch (ParserConfigurationException pce) {
0100: throw new RuntimeException(XMLMessages.createXMLMessage(
0101: XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED,
0102: null)); //"createDocument() not supported in XPathContext!");
0103:
0104: // return null;
0105: }
0106: }
0107:
0108: /**
0109: * DOM Level 1 did not have a standard mechanism for creating a new
0110: * Document object. This function provides a DOM-implementation-independent
0111: * abstraction for that for that concept. It's typically used when
0112: * outputting a new DOM as the result of an operation.
0113: *
0114: * @return The newly created DOM Document object, with no children, or
0115: * null if we can't find a DOM implementation that permits creating
0116: * new empty Documents.
0117: */
0118: public static Document createDocument() {
0119: return createDocument(false);
0120: }
0121:
0122: /**
0123: * Tells, through the combination of the default-space attribute
0124: * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the
0125: * xml:space attribute, whether or not extra whitespace should be stripped
0126: * from the node. Literal elements from template elements should
0127: * <em>not</em> be tested with this function.
0128: * @param textNode A text node from the source tree.
0129: * @return true if the text node should be stripped of extra whitespace.
0130: *
0131: * @throws javax.xml.transform.TransformerException
0132: * @xsl.usage advanced
0133: */
0134: public boolean shouldStripSourceNode(Node textNode)
0135: throws javax.xml.transform.TransformerException {
0136:
0137: // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode);
0138: return false;
0139: }
0140:
0141: /**
0142: * Supports the XPath function GenerateID by returning a unique
0143: * identifier string for any given DOM Node.
0144: * <p>
0145: * Warning: The base implementation uses the Node object's hashCode(),
0146: * which is NOT guaranteed to be unique. If that method hasn't been
0147: * overridden in this DOM ipmlementation, most Java implementions will
0148: * derive it from the object's address and should be OK... but if
0149: * your DOM uses a different definition of hashCode (eg hashing the
0150: * contents of the subtree), or if your DOM may have multiple objects
0151: * that represent a single Node in the data structure (eg via proxying),
0152: * you may need to find another way to assign a unique identifier.
0153: * <p>
0154: * Also, be aware that if nodes are destroyed and recreated, there is
0155: * an open issue regarding whether an ID may be reused. Currently
0156: * we're assuming that the input document is stable for the duration
0157: * of the XPath/XSLT operation, so this shouldn't arise in this context.
0158: * <p>
0159: * (DOM Level 3 is investigating providing a unique node "key", but
0160: * that won't help Level 1 and Level 2 implementations.)
0161: *
0162: * @param node whose identifier you want to obtain
0163: *
0164: * @return a string which should be different for every Node object.
0165: */
0166: public String getUniqueID(Node node) {
0167: return "N" + Integer.toHexString(node.hashCode()).toUpperCase();
0168: }
0169:
0170: /**
0171: * Figure out whether node2 should be considered as being later
0172: * in the document than node1, in Document Order as defined
0173: * by the XPath model. This may not agree with the ordering defined
0174: * by other XML applications.
0175: * <p>
0176: * There are some cases where ordering isn't defined, and neither are
0177: * the results of this function -- though we'll generally return true.
0178: *
0179: * TODO: Make sure this does the right thing with attribute nodes!!!
0180: *
0181: * @param node1 DOM Node to perform position comparison on.
0182: * @param node2 DOM Node to perform position comparison on .
0183: *
0184: * @return false if node2 comes before node1, otherwise return true.
0185: * You can think of this as
0186: * <code>(node1.documentOrderPosition <= node2.documentOrderPosition)</code>.
0187: */
0188: public static boolean isNodeAfter(Node node1, Node node2) {
0189: if (node1 == node2 || isNodeTheSame(node1, node2))
0190: return true;
0191:
0192: // Default return value, if there is no defined ordering
0193: boolean isNodeAfter = true;
0194:
0195: Node parent1 = getParentOfNode(node1);
0196: Node parent2 = getParentOfNode(node2);
0197:
0198: // Optimize for most common case
0199: if (parent1 == parent2 || isNodeTheSame(parent1, parent2)) // then we know they are siblings
0200: {
0201: if (null != parent1)
0202: isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
0203: else {
0204: // If both parents are null, ordering is not defined.
0205: // We're returning a value in lieu of throwing an exception.
0206: // Not a case we expect to arise in XPath, but beware if you
0207: // try to reuse this method.
0208:
0209: // We can just fall through in this case, which allows us
0210: // to hit the debugging code at the end of the function.
0211: //return isNodeAfter;
0212: }
0213: } else {
0214:
0215: // General strategy: Figure out the lengths of the two
0216: // ancestor chains, reconcile the lengths, and look for
0217: // the lowest common ancestor. If that ancestor is one of
0218: // the nodes being compared, it comes before the other.
0219: // Otherwise perform a sibling compare.
0220: //
0221: // NOTE: If no common ancestor is found, ordering is undefined
0222: // and we return the default value of isNodeAfter.
0223:
0224: // Count parents in each ancestor chain
0225: int nParents1 = 2, nParents2 = 2; // include node & parent obtained above
0226:
0227: while (parent1 != null) {
0228: nParents1++;
0229:
0230: parent1 = getParentOfNode(parent1);
0231: }
0232:
0233: while (parent2 != null) {
0234: nParents2++;
0235:
0236: parent2 = getParentOfNode(parent2);
0237: }
0238:
0239: // Initially assume scan for common ancestor starts with
0240: // the input nodes.
0241: Node startNode1 = node1, startNode2 = node2;
0242:
0243: // If one ancestor chain is longer, adjust its start point
0244: // so we're comparing at the same depths
0245: if (nParents1 < nParents2) {
0246: // Adjust startNode2 to depth of startNode1
0247: int adjust = nParents2 - nParents1;
0248:
0249: for (int i = 0; i < adjust; i++) {
0250: startNode2 = getParentOfNode(startNode2);
0251: }
0252: } else if (nParents1 > nParents2) {
0253: // adjust startNode1 to depth of startNode2
0254: int adjust = nParents1 - nParents2;
0255:
0256: for (int i = 0; i < adjust; i++) {
0257: startNode1 = getParentOfNode(startNode1);
0258: }
0259: }
0260:
0261: Node prevChild1 = null, prevChild2 = null; // so we can "back up"
0262:
0263: // Loop up the ancestor chain looking for common parent
0264: while (null != startNode1) {
0265: if (startNode1 == startNode2
0266: || isNodeTheSame(startNode1, startNode2)) // common parent?
0267: {
0268: if (null == prevChild1) // first time in loop?
0269: {
0270:
0271: // Edge condition: one is the ancestor of the other.
0272: isNodeAfter = (nParents1 < nParents2) ? true
0273: : false;
0274:
0275: break; // from while loop
0276: } else {
0277: // Compare ancestors below lowest-common as siblings
0278: isNodeAfter = isNodeAfterSibling(startNode1,
0279: prevChild1, prevChild2);
0280:
0281: break; // from while loop
0282: }
0283: } // end if(startNode1 == startNode2)
0284:
0285: // Move up one level and try again
0286: prevChild1 = startNode1;
0287: startNode1 = getParentOfNode(startNode1);
0288: prevChild2 = startNode2;
0289: startNode2 = getParentOfNode(startNode2);
0290: } // end while(parents exist to examine)
0291: } // end big else (not immediate siblings)
0292:
0293: // WARNING: The following diagnostic won't report the early
0294: // "same node" case. Fix if/when needed.
0295:
0296: /* -- please do not remove... very useful for diagnostics --
0297: System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
0298: ", node2 = "+node2.getNodeName()
0299: +"("+node2.getNodeType()+")"+
0300: ", isNodeAfter = "+isNodeAfter); */
0301: return isNodeAfter;
0302: } // end isNodeAfter(Node node1, Node node2)
0303:
0304: /**
0305: * Use DTMNodeProxy to determine whether two nodes are the same.
0306: *
0307: * @param node1 The first DOM node to compare.
0308: * @param node2 The second DOM node to compare.
0309: * @return true if the two nodes are the same.
0310: */
0311: public static boolean isNodeTheSame(Node node1, Node node2) {
0312: if (node1 instanceof DTMNodeProxy
0313: && node2 instanceof DTMNodeProxy)
0314: return ((DTMNodeProxy) node1).equals((DTMNodeProxy) node2);
0315: else
0316: return (node1 == node2);
0317: }
0318:
0319: /**
0320: * Figure out if child2 is after child1 in document order.
0321: * <p>
0322: * Warning: Some aspects of "document order" are not well defined.
0323: * For example, the order of attributes is considered
0324: * meaningless in XML, and the order reported by our model will
0325: * be consistant for a given invocation but may not
0326: * match that of either the source file or the serialized output.
0327: *
0328: * @param parent Must be the parent of both child1 and child2.
0329: * @param child1 Must be the child of parent and not equal to child2.
0330: * @param child2 Must be the child of parent and not equal to child1.
0331: * @return true if child 2 is after child1 in document order.
0332: */
0333: private static boolean isNodeAfterSibling(Node parent, Node child1,
0334: Node child2) {
0335:
0336: boolean isNodeAfterSibling = false;
0337: short child1type = child1.getNodeType();
0338: short child2type = child2.getNodeType();
0339:
0340: if ((Node.ATTRIBUTE_NODE != child1type)
0341: && (Node.ATTRIBUTE_NODE == child2type)) {
0342:
0343: // always sort attributes before non-attributes.
0344: isNodeAfterSibling = false;
0345: } else if ((Node.ATTRIBUTE_NODE == child1type)
0346: && (Node.ATTRIBUTE_NODE != child2type)) {
0347:
0348: // always sort attributes before non-attributes.
0349: isNodeAfterSibling = true;
0350: } else if (Node.ATTRIBUTE_NODE == child1type) {
0351: NamedNodeMap children = parent.getAttributes();
0352: int nNodes = children.getLength();
0353: boolean found1 = false, found2 = false;
0354:
0355: // Count from the start until we find one or the other.
0356: for (int i = 0; i < nNodes; i++) {
0357: Node child = children.item(i);
0358:
0359: if (child1 == child || isNodeTheSame(child1, child)) {
0360: if (found2) {
0361: isNodeAfterSibling = false;
0362:
0363: break;
0364: }
0365:
0366: found1 = true;
0367: } else if (child2 == child
0368: || isNodeTheSame(child2, child)) {
0369: if (found1) {
0370: isNodeAfterSibling = true;
0371:
0372: break;
0373: }
0374:
0375: found2 = true;
0376: }
0377: }
0378: } else {
0379: // TODO: Check performance of alternate solution:
0380: // There are two choices here: Count from the start of
0381: // the document until we find one or the other, or count
0382: // from one until we find or fail to find the other.
0383: // Either can wind up scanning all the siblings in the worst
0384: // case, which on a wide document can be a lot of work but
0385: // is more typically is a short list.
0386: // Scanning from the start involves two tests per iteration,
0387: // but it isn't clear that scanning from the middle doesn't
0388: // yield more iterations on average.
0389: // We should run some testcases.
0390: Node child = parent.getFirstChild();
0391: boolean found1 = false, found2 = false;
0392:
0393: while (null != child) {
0394:
0395: // Node child = children.item(i);
0396: if (child1 == child || isNodeTheSame(child1, child)) {
0397: if (found2) {
0398: isNodeAfterSibling = false;
0399:
0400: break;
0401: }
0402:
0403: found1 = true;
0404: } else if (child2 == child
0405: || isNodeTheSame(child2, child)) {
0406: if (found1) {
0407: isNodeAfterSibling = true;
0408:
0409: break;
0410: }
0411:
0412: found2 = true;
0413: }
0414:
0415: child = child.getNextSibling();
0416: }
0417: }
0418:
0419: return isNodeAfterSibling;
0420: } // end isNodeAfterSibling(Node parent, Node child1, Node child2)
0421:
0422: //==========================================================
0423: // SECTION: Namespace resolution
0424: //==========================================================
0425:
0426: /**
0427: * Get the depth level of this node in the tree (equals 1 for
0428: * a parentless node).
0429: *
0430: * @param n Node to be examined.
0431: * @return the number of ancestors, plus one
0432: * @xsl.usage internal
0433: */
0434: public short getLevel(Node n) {
0435:
0436: short level = 1;
0437:
0438: while (null != (n = getParentOfNode(n))) {
0439: level++;
0440: }
0441:
0442: return level;
0443: }
0444:
0445: /**
0446: * Given an XML Namespace prefix and a context in which the prefix
0447: * is to be evaluated, return the Namespace Name this prefix was
0448: * bound to. Note that DOM Level 3 is expected to provide a version of
0449: * this which deals with the DOM's "early binding" behavior.
0450: *
0451: * Default handling:
0452: *
0453: * @param prefix String containing namespace prefix to be resolved,
0454: * without the ':' which separates it from the localname when used
0455: * in a Node Name. The empty sting signifies the default namespace
0456: * at this point in the document.
0457: * @param namespaceContext Element which provides context for resolution.
0458: * (We could extend this to work for other nodes by first seeking their
0459: * nearest Element ancestor.)
0460: *
0461: * @return a String containing the Namespace URI which this prefix
0462: * represents in the specified context.
0463: */
0464: public String getNamespaceForPrefix(String prefix,
0465: Element namespaceContext) {
0466:
0467: int type;
0468: Node parent = namespaceContext;
0469: String namespace = null;
0470:
0471: if (prefix.equals("xml")) {
0472: namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
0473: } else if (prefix.equals("xmlns")) {
0474: // Hardcoded in the DOM spec, expected to be adopted by
0475: // Namespace spec. NOTE: Namespace declarations _must_ use
0476: // the xmlns: prefix; other prefixes declared as belonging
0477: // to this namespace will not be recognized and should
0478: // probably be rejected by parsers as erroneous declarations.
0479: namespace = "http://www.w3.org/2000/xmlns/";
0480: } else {
0481: // Attribute name for this prefix's declaration
0482: String declname = (prefix == "") ? "xmlns" : "xmlns:"
0483: + prefix;
0484:
0485: // Scan until we run out of Elements or have resolved the namespace
0486: while ((null != parent)
0487: && (null == namespace)
0488: && (((type = parent.getNodeType()) == Node.ELEMENT_NODE) || (type == Node.ENTITY_REFERENCE_NODE))) {
0489: if (type == Node.ELEMENT_NODE) {
0490:
0491: // Look for the appropriate Namespace Declaration attribute,
0492: // either "xmlns:prefix" or (if prefix is "") "xmlns".
0493: // TODO: This does not handle "implicit declarations"
0494: // which may be created when the DOM is edited. DOM Level
0495: // 3 will define how those should be interpreted. But
0496: // this issue won't arise in freshly-parsed DOMs.
0497:
0498: // NOTE: declname is set earlier, outside the loop.
0499: Attr attr = ((Element) parent)
0500: .getAttributeNode(declname);
0501: if (attr != null) {
0502: namespace = attr.getNodeValue();
0503: break;
0504: }
0505: }
0506:
0507: parent = getParentOfNode(parent);
0508: }
0509: }
0510:
0511: return namespace;
0512: }
0513:
0514: /**
0515: * An experiment for the moment.
0516: */
0517: Hashtable m_NSInfos = new Hashtable();
0518:
0519: /** Object to put into the m_NSInfos table that tells that a node has not been
0520: * processed, but has xmlns namespace decls. */
0521: protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(
0522: false, true);
0523:
0524: /** Object to put into the m_NSInfos table that tells that a node has not been
0525: * processed, but has no xmlns namespace decls. */
0526: protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(
0527: false, false);
0528:
0529: /** Object to put into the m_NSInfos table that tells that a node has not been
0530: * processed, and has no xmlns namespace decls, and has no ancestor decls. */
0531: protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS = new NSInfo(
0532: false, false, NSInfo.ANCESTORNOXMLNS);
0533:
0534: /** Object to put into the m_NSInfos table that tells that a node has been
0535: * processed, and has xmlns namespace decls. */
0536: protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(
0537: true, true);
0538:
0539: /** Object to put into the m_NSInfos table that tells that a node has been
0540: * processed, and has no xmlns namespace decls. */
0541: protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(
0542: true, false);
0543:
0544: /** Object to put into the m_NSInfos table that tells that a node has been
0545: * processed, and has no xmlns namespace decls. and has no ancestor decls. */
0546: protected static final NSInfo m_NSInfoNullNoAncestorXMLNS = new NSInfo(
0547: true, false, NSInfo.ANCESTORNOXMLNS);
0548:
0549: /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if
0550: * the given node is a candidate for ancestor namespace processing. */
0551: protected Vector m_candidateNoAncestorXMLNS = new Vector();
0552:
0553: /**
0554: * Returns the namespace of the given node. Differs from simply getting
0555: * the node's prefix and using getNamespaceForPrefix in that it attempts
0556: * to cache some of the data in NSINFO objects, to avoid repeated lookup.
0557: * TODO: Should we consider moving that logic into getNamespaceForPrefix?
0558: *
0559: * @param n Node to be examined.
0560: *
0561: * @return String containing the Namespace Name (uri) for this node.
0562: * Note that this is undefined for any nodes other than Elements and
0563: * Attributes.
0564: */
0565: public String getNamespaceOfNode(Node n) {
0566:
0567: String namespaceOfPrefix;
0568: boolean hasProcessedNS;
0569: NSInfo nsInfo;
0570: short ntype = n.getNodeType();
0571:
0572: if (Node.ATTRIBUTE_NODE != ntype) {
0573: Object nsObj = m_NSInfos.get(n); // return value
0574:
0575: nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
0576: hasProcessedNS = (nsInfo == null) ? false
0577: : nsInfo.m_hasProcessedNS;
0578: } else {
0579: hasProcessedNS = false;
0580: nsInfo = null;
0581: }
0582:
0583: if (hasProcessedNS) {
0584: namespaceOfPrefix = nsInfo.m_namespace;
0585: } else {
0586: namespaceOfPrefix = null;
0587:
0588: String nodeName = n.getNodeName();
0589: int indexOfNSSep = nodeName.indexOf(':');
0590: String prefix;
0591:
0592: if (Node.ATTRIBUTE_NODE == ntype) {
0593: if (indexOfNSSep > 0) {
0594: prefix = nodeName.substring(0, indexOfNSSep);
0595: } else {
0596:
0597: // Attributes don't use the default namespace, so if
0598: // there isn't a prefix, we're done.
0599: return namespaceOfPrefix;
0600: }
0601: } else {
0602: prefix = (indexOfNSSep >= 0) ? nodeName.substring(0,
0603: indexOfNSSep) : "";
0604: }
0605:
0606: boolean ancestorsHaveXMLNS = false;
0607: boolean nHasXMLNS = false;
0608:
0609: if (prefix.equals("xml")) {
0610: namespaceOfPrefix = QName.S_XMLNAMESPACEURI;
0611: } else {
0612: int parentType;
0613: Node parent = n;
0614:
0615: while ((null != parent) && (null == namespaceOfPrefix)) {
0616: if ((null != nsInfo)
0617: && (nsInfo.m_ancestorHasXMLNSAttrs == NSInfo.ANCESTORNOXMLNS)) {
0618: break;
0619: }
0620:
0621: parentType = parent.getNodeType();
0622:
0623: if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs) {
0624: boolean elementHasXMLNS = false;
0625:
0626: if (parentType == Node.ELEMENT_NODE) {
0627: NamedNodeMap nnm = parent.getAttributes();
0628:
0629: for (int i = 0; i < nnm.getLength(); i++) {
0630: Node attr = nnm.item(i);
0631: String aname = attr.getNodeName();
0632:
0633: if (aname.charAt(0) == 'x') {
0634: boolean isPrefix = aname
0635: .startsWith("xmlns:");
0636:
0637: if (aname.equals("xmlns")
0638: || isPrefix) {
0639: if (n == parent)
0640: nHasXMLNS = true;
0641:
0642: elementHasXMLNS = true;
0643: ancestorsHaveXMLNS = true;
0644:
0645: String p = isPrefix ? aname
0646: .substring(6) : "";
0647:
0648: if (p.equals(prefix)) {
0649: namespaceOfPrefix = attr
0650: .getNodeValue();
0651:
0652: break;
0653: }
0654: }
0655: }
0656: }
0657: }
0658:
0659: if ((Node.ATTRIBUTE_NODE != parentType)
0660: && (null == nsInfo) && (n != parent)) {
0661: nsInfo = elementHasXMLNS ? m_NSInfoUnProcWithXMLNS
0662: : m_NSInfoUnProcWithoutXMLNS;
0663:
0664: m_NSInfos.put(parent, nsInfo);
0665: }
0666: }
0667:
0668: if (Node.ATTRIBUTE_NODE == parentType) {
0669: parent = getParentOfNode(parent);
0670: } else {
0671: m_candidateNoAncestorXMLNS.addElement(parent);
0672: m_candidateNoAncestorXMLNS.addElement(nsInfo);
0673:
0674: parent = parent.getParentNode();
0675: }
0676:
0677: if (null != parent) {
0678: Object nsObj = m_NSInfos.get(parent); // return value
0679:
0680: nsInfo = (nsObj == null) ? null
0681: : (NSInfo) nsObj;
0682: }
0683: }
0684:
0685: int nCandidates = m_candidateNoAncestorXMLNS.size();
0686:
0687: if (nCandidates > 0) {
0688: if ((false == ancestorsHaveXMLNS)
0689: && (null == parent)) {
0690: for (int i = 0; i < nCandidates; i += 2) {
0691: Object candidateInfo = m_candidateNoAncestorXMLNS
0692: .elementAt(i + 1);
0693:
0694: if (candidateInfo == m_NSInfoUnProcWithoutXMLNS) {
0695: m_NSInfos.put(
0696: m_candidateNoAncestorXMLNS
0697: .elementAt(i),
0698: m_NSInfoUnProcNoAncestorXMLNS);
0699: } else if (candidateInfo == m_NSInfoNullWithoutXMLNS) {
0700: m_NSInfos.put(
0701: m_candidateNoAncestorXMLNS
0702: .elementAt(i),
0703: m_NSInfoNullNoAncestorXMLNS);
0704: }
0705: }
0706: }
0707:
0708: m_candidateNoAncestorXMLNS.removeAllElements();
0709: }
0710: }
0711:
0712: if (Node.ATTRIBUTE_NODE != ntype) {
0713: if (null == namespaceOfPrefix) {
0714: if (ancestorsHaveXMLNS) {
0715: if (nHasXMLNS)
0716: m_NSInfos.put(n, m_NSInfoNullWithXMLNS);
0717: else
0718: m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS);
0719: } else {
0720: m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS);
0721: }
0722: } else {
0723: m_NSInfos.put(n, new NSInfo(namespaceOfPrefix,
0724: nHasXMLNS));
0725: }
0726: }
0727: }
0728:
0729: return namespaceOfPrefix;
0730: }
0731:
0732: /**
0733: * Returns the local name of the given node. If the node's name begins
0734: * with a namespace prefix, this is the part after the colon; otherwise
0735: * it's the full node name.
0736: *
0737: * @param n the node to be examined.
0738: *
0739: * @return String containing the Local Name
0740: */
0741: public String getLocalNameOfNode(Node n) {
0742:
0743: String qname = n.getNodeName();
0744: int index = qname.indexOf(':');
0745:
0746: return (index < 0) ? qname : qname.substring(index + 1);
0747: }
0748:
0749: /**
0750: * Returns the element name with the namespace prefix (if any) replaced
0751: * by the Namespace URI it was bound to. This is not a standard
0752: * representation of a node name, but it allows convenient
0753: * single-string comparison of the "universal" names of two nodes.
0754: *
0755: * @param elem Element to be examined.
0756: *
0757: * @return String in the form "namespaceURI:localname" if the node
0758: * belongs to a namespace, or simply "localname" if it doesn't.
0759: * @see #getExpandedAttributeName
0760: */
0761: public String getExpandedElementName(Element elem) {
0762:
0763: String namespace = getNamespaceOfNode(elem);
0764:
0765: return (null != namespace) ? namespace + ":"
0766: + getLocalNameOfNode(elem) : getLocalNameOfNode(elem);
0767: }
0768:
0769: /**
0770: * Returns the attribute name with the namespace prefix (if any) replaced
0771: * by the Namespace URI it was bound to. This is not a standard
0772: * representation of a node name, but it allows convenient
0773: * single-string comparison of the "universal" names of two nodes.
0774: *
0775: * @param attr Attr to be examined
0776: *
0777: * @return String in the form "namespaceURI:localname" if the node
0778: * belongs to a namespace, or simply "localname" if it doesn't.
0779: * @see #getExpandedElementName
0780: */
0781: public String getExpandedAttributeName(Attr attr) {
0782:
0783: String namespace = getNamespaceOfNode(attr);
0784:
0785: return (null != namespace) ? namespace + ":"
0786: + getLocalNameOfNode(attr) : getLocalNameOfNode(attr);
0787: }
0788:
0789: //==========================================================
0790: // SECTION: DOM Helper Functions
0791: //==========================================================
0792:
0793: /**
0794: * Tell if the node is ignorable whitespace. Note that this can
0795: * be determined only in the context of a DTD or other Schema,
0796: * and that DOM Level 2 has nostandardized DOM API which can
0797: * return that information.
0798: * @deprecated
0799: *
0800: * @param node Node to be examined
0801: *
0802: * @return CURRENTLY HARDCODED TO FALSE, but should return true if
0803: * and only if the node is of type Text, contains only whitespace,
0804: * and does not appear as part of the #PCDATA content of an element.
0805: * (Note that determining this last may require allowing for
0806: * Entity References.)
0807: */
0808: public boolean isIgnorableWhitespace(Text node) {
0809:
0810: boolean isIgnorable = false; // return value
0811:
0812: // TODO: I can probably do something to figure out if this
0813: // space is ignorable from just the information in
0814: // the DOM tree.
0815: // -- You need to be able to distinguish whitespace
0816: // that is #PCDATA from whitespace that isn't. That requires
0817: // DTD support, which won't be standardized until DOM Level 3.
0818: return isIgnorable;
0819: }
0820:
0821: /**
0822: * Get the first unparented node in the ancestor chain.
0823: * @deprecated
0824: *
0825: * @param node Starting node, to specify which chain to chase
0826: *
0827: * @return the topmost ancestor.
0828: */
0829: public Node getRoot(Node node) {
0830:
0831: Node root = null;
0832:
0833: while (node != null) {
0834: root = node;
0835: node = getParentOfNode(node);
0836: }
0837:
0838: return root;
0839: }
0840:
0841: /**
0842: * Get the root node of the document tree, regardless of
0843: * whether or not the node passed in is a document node.
0844: * <p>
0845: * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
0846: * -- it's currently returning ownerDocument even when the tree is
0847: * not actually part of the main Document tree. We should either
0848: * rewrite the description to say that it finds the Document node,
0849: * or change the code to walk up the ancestor chain.
0850:
0851: *
0852: * @param n Node to be examined
0853: *
0854: * @return the Document node. Note that this is not the correct answer
0855: * if n was (or was a child of) a DocumentFragment or an orphaned node,
0856: * as can arise if the DOM has been edited rather than being generated
0857: * by a parser.
0858: */
0859: public Node getRootNode(Node n) {
0860: int nt = n.getNodeType();
0861: return ((Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt)) ? n
0862: : n.getOwnerDocument();
0863: }
0864:
0865: /**
0866: * Test whether the given node is a namespace decl node. In DOM Level 2
0867: * this can be done in a namespace-aware manner, but in Level 1 DOMs
0868: * it has to be done by testing the node name.
0869: *
0870: * @param n Node to be examined.
0871: *
0872: * @return boolean -- true iff the node is an Attr whose name is
0873: * "xmlns" or has the "xmlns:" prefix.
0874: */
0875: public boolean isNamespaceNode(Node n) {
0876:
0877: if (Node.ATTRIBUTE_NODE == n.getNodeType()) {
0878: String attrName = n.getNodeName();
0879:
0880: return (attrName.startsWith("xmlns:") || attrName
0881: .equals("xmlns"));
0882: }
0883:
0884: return false;
0885: }
0886:
0887: /**
0888: * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
0889: * parent for other nodes.
0890: * <p>
0891: * Background: The DOM believes that you must be your Parent's
0892: * Child, and thus Attrs don't have parents. XPath said that Attrs
0893: * do have their owning Element as their parent. This function
0894: * bridges the difference, either by using the DOM Level 2 ownerElement
0895: * function or by using a "silly and expensive function" in Level 1
0896: * DOMs.
0897: * <p>
0898: * (There's some discussion of future DOMs generalizing ownerElement
0899: * into ownerNode and making it work on all types of nodes. This
0900: * still wouldn't help the users of Level 1 or Level 2 DOMs)
0901: * <p>
0902: *
0903: * @param node Node whose XPath parent we want to obtain
0904: *
0905: * @return the parent of the node, or the ownerElement if it's an
0906: * Attr node, or null if the node is an orphan.
0907: *
0908: * @throws RuntimeException if the Document has no root element.
0909: * This can't arise if the Document was created
0910: * via the DOM Level 2 factory methods, but is possible if other
0911: * mechanisms were used to obtain it
0912: */
0913: public static Node getParentOfNode(Node node)
0914: throws RuntimeException {
0915: Node parent;
0916: short nodeType = node.getNodeType();
0917:
0918: if (Node.ATTRIBUTE_NODE == nodeType) {
0919: Document doc = node.getOwnerDocument();
0920: /*
0921: TBD:
0922: if(null == doc)
0923: {
0924: throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
0925: }
0926: */
0927:
0928: // Given how expensive the tree walk may be, we should first ask
0929: // whether this DOM can answer the question for us. The additional
0930: // test does slow down Level 1 DOMs slightly. DOMHelper2, which
0931: // is currently specialized for Xerces, assumes it can use the
0932: // Level 2 solution. We might want to have an intermediate stage,
0933: // which would assume DOM Level 2 but not assume Xerces.
0934: //
0935: // (Shouldn't have to check whether impl is null in a compliant DOM,
0936: // but let's be paranoid for a moment...)
0937: DOMImplementation impl = doc.getImplementation();
0938: if (impl != null && impl.hasFeature("Core", "2.0")) {
0939: parent = ((Attr) node).getOwnerElement();
0940: return parent;
0941: }
0942:
0943: // DOM Level 1 solution, as fallback. Hugely expensive.
0944:
0945: Element rootElem = doc.getDocumentElement();
0946:
0947: if (null == rootElem) {
0948: throw new RuntimeException(
0949: XMLMessages
0950: .createXMLMessage(
0951: XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
0952: null)); //"Attribute child does not have an owner document element!");
0953: }
0954:
0955: parent = locateAttrParent(rootElem, node);
0956:
0957: } else {
0958: parent = node.getParentNode();
0959:
0960: // if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
0961: // {
0962: // throw new RuntimeException("Child does not have parent!");
0963: // }
0964: }
0965:
0966: return parent;
0967: }
0968:
0969: /**
0970: * Given an ID, return the element. This can work only if the document
0971: * is interpreted in the context of a DTD or Schema, since otherwise
0972: * we don't know which attributes are or aren't IDs.
0973: * <p>
0974: * Note that DOM Level 1 had no ability to retrieve this information.
0975: * DOM Level 2 introduced it but does not promise that it will be
0976: * supported in all DOMs; those which can't support it will always
0977: * return null.
0978: * <p>
0979: * TODO: getElementByID is currently unimplemented. Support DOM Level 2?
0980: *
0981: * @param id The unique identifier to be searched for.
0982: * @param doc The document to search within.
0983: * @return CURRENTLY HARDCODED TO NULL, but it should be:
0984: * The node which has this unique identifier, or null if there
0985: * is no such node or this DOM can't reliably recognize it.
0986: */
0987: public Element getElementByID(String id, Document doc) {
0988: return null;
0989: }
0990:
0991: /**
0992: * The getUnparsedEntityURI function returns the URI of the unparsed
0993: * entity with the specified name in the same document as the context
0994: * node (see [3.3 Unparsed Entities]). It returns the empty string if
0995: * there is no such entity.
0996: * <p>
0997: * XML processors may choose to use the System Identifier (if one
0998: * is provided) to resolve the entity, rather than the URI in the
0999: * Public Identifier. The details are dependent on the processor, and
1000: * we would have to support some form of plug-in resolver to handle
1001: * this properly. Currently, we simply return the System Identifier if
1002: * present, and hope that it a usable URI or that our caller can
1003: * map it to one.
1004: * TODO: Resolve Public Identifiers... or consider changing function name.
1005: * <p>
1006: * If we find a relative URI
1007: * reference, XML expects it to be resolved in terms of the base URI
1008: * of the document. The DOM doesn't do that for us, and it isn't
1009: * entirely clear whether that should be done here; currently that's
1010: * pushed up to a higher levelof our application. (Note that DOM Level
1011: * 1 didn't store the document's base URI.)
1012: * TODO: Consider resolving Relative URIs.
1013: * <p>
1014: * (The DOM's statement that "An XML processor may choose to
1015: * completely expand entities before the structure model is passed
1016: * to the DOM" refers only to parsed entities, not unparsed, and hence
1017: * doesn't affect this function.)
1018: *
1019: * @param name A string containing the Entity Name of the unparsed
1020: * entity.
1021: * @param doc Document node for the document to be searched.
1022: *
1023: * @return String containing the URI of the Unparsed Entity, or an
1024: * empty string if no such entity exists.
1025: */
1026: public String getUnparsedEntityURI(String name, Document doc) {
1027:
1028: String url = "";
1029: DocumentType doctype = doc.getDoctype();
1030:
1031: if (null != doctype) {
1032: NamedNodeMap entities = doctype.getEntities();
1033: if (null == entities)
1034: return url;
1035: Entity entity = (Entity) entities.getNamedItem(name);
1036: if (null == entity)
1037: return url;
1038:
1039: String notationName = entity.getNotationName();
1040:
1041: if (null != notationName) // then it's unparsed
1042: {
1043: // The draft says: "The XSLT processor may use the public
1044: // identifier to generate a URI for the entity instead of the URI
1045: // specified in the system identifier. If the XSLT processor does
1046: // not use the public identifier to generate the URI, it must use
1047: // the system identifier; if the system identifier is a relative
1048: // URI, it must be resolved into an absolute URI using the URI of
1049: // the resource containing the entity declaration as the base
1050: // URI [RFC2396]."
1051: // So I'm falling a bit short here.
1052: url = entity.getSystemId();
1053:
1054: if (null == url) {
1055: url = entity.getPublicId();
1056: } else {
1057: // This should be resolved to an absolute URL, but that's hard
1058: // to do from here.
1059: }
1060: }
1061: }
1062:
1063: return url;
1064: }
1065:
1066: /**
1067: * Support for getParentOfNode; walks a DOM tree until it finds
1068: * the Element which owns the Attr. This is hugely expensive, and
1069: * if at all possible you should use the DOM Level 2 Attr.ownerElement()
1070: * method instead.
1071: * <p>
1072: * The DOM Level 1 developers expected that folks would keep track
1073: * of the last Element they'd seen and could recover the info from
1074: * that source. Obviously that doesn't work very well if the only
1075: * information you've been presented with is the Attr. The DOM Level 2
1076: * getOwnerElement() method fixes that, but only for Level 2 and
1077: * later DOMs.
1078: *
1079: * @param elem Element whose subtree is to be searched for this Attr
1080: * @param attr Attr whose owner is to be located.
1081: *
1082: * @return the first Element whose attribute list includes the provided
1083: * attr. In modern DOMs, this will also be the only such Element. (Early
1084: * DOMs had some hope that Attrs might be sharable, but this idea has
1085: * been abandoned.)
1086: */
1087: private static Node locateAttrParent(Element elem, Node attr) {
1088:
1089: Node parent = null;
1090:
1091: // This should only be called for Level 1 DOMs, so we don't have to
1092: // worry about namespace issues. In later levels, it's possible
1093: // for a DOM to have two Attrs with the same NodeName but
1094: // different namespaces, and we'd need to get getAttributeNodeNS...
1095: // but later levels also have Attr.getOwnerElement.
1096: Attr check = elem.getAttributeNode(attr.getNodeName());
1097: if (check == attr)
1098: parent = elem;
1099:
1100: if (null == parent) {
1101: for (Node node = elem.getFirstChild(); null != node; node = node
1102: .getNextSibling()) {
1103: if (Node.ELEMENT_NODE == node.getNodeType()) {
1104: parent = locateAttrParent((Element) node, attr);
1105:
1106: if (null != parent)
1107: break;
1108: }
1109: }
1110: }
1111:
1112: return parent;
1113: }
1114:
1115: /**
1116: * The factory object used for creating nodes
1117: * in the result tree.
1118: */
1119: protected Document m_DOMFactory = null;
1120:
1121: /**
1122: * Store the factory object required to create DOM nodes
1123: * in the result tree. In fact, that's just the result tree's
1124: * Document node...
1125: *
1126: * @param domFactory The DOM Document Node within whose context
1127: * the result tree will be built.
1128: */
1129: public void setDOMFactory(Document domFactory) {
1130: this .m_DOMFactory = domFactory;
1131: }
1132:
1133: /**
1134: * Retrieve the factory object required to create DOM nodes
1135: * in the result tree.
1136: *
1137: * @return The result tree's DOM Document Node.
1138: */
1139: public Document getDOMFactory() {
1140:
1141: if (null == this .m_DOMFactory) {
1142: this .m_DOMFactory = createDocument();
1143: }
1144:
1145: return this .m_DOMFactory;
1146: }
1147:
1148: /**
1149: * Get the textual contents of the node. See
1150: * getNodeData(Node,FastStringBuffer) for discussion of how
1151: * whitespace nodes are handled.
1152: *
1153: * @param node DOM Node to be examined
1154: * @return String containing a concatenation of all the
1155: * textual content within that node.
1156: * @see #getNodeData(Node,FastStringBuffer)
1157: *
1158: */
1159: public static String getNodeData(Node node) {
1160:
1161: FastStringBuffer buf = StringBufferPool.get();
1162: String s;
1163:
1164: try {
1165: getNodeData(node, buf);
1166:
1167: s = (buf.length() > 0) ? buf.toString() : "";
1168: } finally {
1169: StringBufferPool.free(buf);
1170: }
1171:
1172: return s;
1173: }
1174:
1175: /**
1176: * Retrieve the text content of a DOM subtree, appending it into a
1177: * user-supplied FastStringBuffer object. Note that attributes are
1178: * not considered part of the content of an element.
1179: * <p>
1180: * There are open questions regarding whitespace stripping.
1181: * Currently we make no special effort in that regard, since the standard
1182: * DOM doesn't yet provide DTD-based information to distinguish
1183: * whitespace-in-element-context from genuine #PCDATA. Note that we
1184: * should probably also consider xml:space if/when we address this.
1185: * DOM Level 3 may solve the problem for us.
1186: *
1187: * @param node Node whose subtree is to be walked, gathering the
1188: * contents of all Text or CDATASection nodes.
1189: * @param buf FastStringBuffer into which the contents of the text
1190: * nodes are to be concatenated.
1191: */
1192: public static void getNodeData(Node node, FastStringBuffer buf) {
1193:
1194: switch (node.getNodeType()) {
1195: case Node.DOCUMENT_FRAGMENT_NODE:
1196: case Node.DOCUMENT_NODE:
1197: case Node.ELEMENT_NODE: {
1198: for (Node child = node.getFirstChild(); null != child; child = child
1199: .getNextSibling()) {
1200: getNodeData(child, buf);
1201: }
1202: }
1203: break;
1204: case Node.TEXT_NODE:
1205: case Node.CDATA_SECTION_NODE:
1206: buf.append(node.getNodeValue());
1207: break;
1208: case Node.ATTRIBUTE_NODE:
1209: buf.append(node.getNodeValue());
1210: break;
1211: case Node.PROCESSING_INSTRUCTION_NODE:
1212: // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1213: break;
1214: default:
1215: // ignore
1216: break;
1217: }
1218: }
1219: }
|