0001: /*
0002: * Copyright 1999-2004 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016: /*
0017: * $Id: DOM2DTM.java,v 1.35 2005/01/24 00:34:36 mcnamara Exp $
0018: */
0019: package org.apache.xml.dtm.ref.dom2dtm;
0020:
0021: import java.util.Vector;
0022:
0023: import javax.xml.transform.SourceLocator;
0024: import javax.xml.transform.dom.DOMSource;
0025:
0026: import org.apache.xml.dtm.DTM;
0027: import org.apache.xml.dtm.DTMManager;
0028: import org.apache.xml.dtm.DTMWSFilter;
0029: import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
0030: import org.apache.xml.dtm.ref.DTMManagerDefault;
0031: import org.apache.xml.dtm.ref.ExpandedNameTable;
0032: import org.apache.xml.dtm.ref.IncrementalSAXSource;
0033: import org.apache.xml.res.XMLErrorResources;
0034: import org.apache.xml.res.XMLMessages;
0035: import org.apache.xml.utils.FastStringBuffer;
0036: import org.apache.xml.utils.QName;
0037: import org.apache.xml.utils.StringBufferPool;
0038: import org.apache.xml.utils.TreeWalker;
0039: import org.apache.xml.utils.XMLCharacterRecognizer;
0040: import org.apache.xml.utils.XMLString;
0041: import org.apache.xml.utils.XMLStringFactory;
0042: import org.w3c.dom.Attr;
0043: import org.w3c.dom.Document;
0044: import org.w3c.dom.DocumentType;
0045: import org.w3c.dom.Element;
0046: import org.w3c.dom.Entity;
0047: import org.w3c.dom.NamedNodeMap;
0048: import org.w3c.dom.Node;
0049: import org.xml.sax.ContentHandler;
0050:
0051: /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
0052: * DTM API.
0053: *
0054: * Note that it doesn't necessarily represent a full Document
0055: * tree. You can wrap a DOM2DTM around a specific node and its subtree
0056: * and the right things should happen. (I don't _think_ we currently
0057: * support DocumentFrgment nodes as roots, though that might be worth
0058: * considering.)
0059: *
0060: * Note too that we do not currently attempt to track document
0061: * mutation. If you alter the DOM after wrapping DOM2DTM around it,
0062: * all bets are off.
0063: * */
0064: public class DOM2DTM extends DTMDefaultBaseIterators {
0065: static final boolean JJK_DEBUG = false;
0066: static final boolean JJK_NEWCODE = true;
0067:
0068: /** Manefest constant
0069: */
0070: static final String NAMESPACE_DECL_NS = "http://www.w3.org/XML/1998/namespace";
0071:
0072: /** The current position in the DOM tree. Last node examined for
0073: * possible copying to DTM. */
0074: transient private Node m_pos;
0075: /** The current position in the DTM tree. Who children get appended to. */
0076: private int m_last_parent = 0;
0077: /** The current position in the DTM tree. Who children reference as their
0078: * previous sib. */
0079: private int m_last_kid = NULL;
0080:
0081: /** The top of the subtree.
0082: * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
0083: * */
0084: transient private Node m_root;
0085:
0086: /** True iff the first element has been processed. This is used to control
0087: synthesis of the implied xml: namespace declaration node. */
0088: boolean m_processedFirstElement = false;
0089:
0090: /** true if ALL the nodes in the m_root subtree have been processed;
0091: * false if our incremental build has not yet finished scanning the
0092: * DOM tree. */
0093: transient private boolean m_nodesAreProcessed;
0094:
0095: /** The node objects. The instance part of the handle indexes
0096: * directly into this vector. Each DTM node may actually be
0097: * composed of several DOM nodes (for example, if logically-adjacent
0098: * Text/CDATASection nodes in the DOM have been coalesced into a
0099: * single DTM Text node); this table points only to the first in
0100: * that sequence. */
0101: protected Vector m_nodes = new Vector();
0102:
0103: /**
0104: * Construct a DOM2DTM object from a DOM node.
0105: *
0106: * @param mgr The DTMManager who owns this DTM.
0107: * @param domSource the DOM source that this DTM will wrap.
0108: * @param dtmIdentity The DTM identity ID for this DTM.
0109: * @param whiteSpaceFilter The white space filter for this DTM, which may
0110: * be null.
0111: * @param xstringfactory XMLString factory for creating character content.
0112: * @param doIndexing true if the caller considers it worth it to use
0113: * indexing schemes.
0114: */
0115: public DOM2DTM(DTMManager mgr, DOMSource domSource,
0116: int dtmIdentity, DTMWSFilter whiteSpaceFilter,
0117: XMLStringFactory xstringfactory, boolean doIndexing) {
0118: super (mgr, domSource, dtmIdentity, whiteSpaceFilter,
0119: xstringfactory, doIndexing);
0120:
0121: // Initialize DOM navigation
0122: m_pos = m_root = domSource.getNode();
0123: // Initialize DTM navigation
0124: m_last_parent = m_last_kid = NULL;
0125: m_last_kid = addNode(m_root, m_last_parent, m_last_kid, NULL);
0126:
0127: // Apparently the domSource root may not actually be the
0128: // Document node. If it's an Element node, we need to immediately
0129: // add its attributes. Adapted from nextNode().
0130: // %REVIEW% Move this logic into addNode and recurse? Cleaner!
0131: //
0132: // (If it's an EntityReference node, we're probably scrod. For now
0133: // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
0134: //
0135: // %ISSUE% What about inherited namespaces in this case?
0136: // Do we need to special-case initialize them into the DTM model?
0137: if (ELEMENT_NODE == m_root.getNodeType()) {
0138: NamedNodeMap attrs = m_root.getAttributes();
0139: int attrsize = (attrs == null) ? 0 : attrs.getLength();
0140: if (attrsize > 0) {
0141: int attrIndex = NULL; // start with no previous sib
0142: for (int i = 0; i < attrsize; ++i) {
0143: // No need to force nodetype in this case;
0144: // addNode() will take care of switching it from
0145: // Attr to Namespace if necessary.
0146: attrIndex = addNode(attrs.item(i), 0, attrIndex,
0147: NULL);
0148: m_firstch.setElementAt(DTM.NULL, attrIndex);
0149: }
0150: // Terminate list of attrs, and make sure they aren't
0151: // considered children of the element
0152: m_nextsib.setElementAt(DTM.NULL, attrIndex);
0153:
0154: // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
0155: } // if attrs exist
0156: } //if(ELEMENT_NODE)
0157:
0158: // Initialize DTM-completed status
0159: m_nodesAreProcessed = false;
0160: }
0161:
0162: /**
0163: * Construct the node map from the node.
0164: *
0165: * @param node The node that is to be added to the DTM.
0166: * @param parentIndex The current parent index.
0167: * @param previousSibling The previous sibling index.
0168: * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
0169: * Used to force nodes to Text rather than CDATASection when their
0170: * coalesced value includes ordinary Text nodes (current DTM behavior).
0171: *
0172: * @return The index identity of the node that was added.
0173: */
0174: protected int addNode(Node node, int parentIndex,
0175: int previousSibling, int forceNodeType) {
0176: int nodeIndex = m_nodes.size();
0177:
0178: // Have we overflowed a DTM Identity's addressing range?
0179: if (m_dtmIdent.size() == (nodeIndex >>> DTMManager.IDENT_DTM_NODE_BITS)) {
0180: try {
0181: if (m_mgr == null)
0182: throw new ClassCastException();
0183:
0184: // Handle as Extended Addressing
0185: DTMManagerDefault mgrD = (DTMManagerDefault) m_mgr;
0186: int id = mgrD.getFirstFreeDTMID();
0187: mgrD.addDTM(this , id, nodeIndex);
0188: m_dtmIdent
0189: .addElement(id << DTMManager.IDENT_DTM_NODE_BITS);
0190: } catch (ClassCastException e) {
0191: // %REVIEW% Wrong error message, but I've been told we're trying
0192: // not to add messages right not for I18N reasons.
0193: // %REVIEW% Should this be a Fatal Error?
0194: error(XMLMessages.createXMLMessage(
0195: XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
0196: }
0197: }
0198:
0199: m_size++;
0200: // ensureSize(nodeIndex);
0201:
0202: int type;
0203: if (NULL == forceNodeType)
0204: type = node.getNodeType();
0205: else
0206: type = forceNodeType;
0207:
0208: // %REVIEW% The Namespace Spec currently says that Namespaces are
0209: // processed in a non-namespace-aware manner, by matching the
0210: // QName, even though there is in fact a namespace assigned to
0211: // these nodes in the DOM. If and when that changes, we will have
0212: // to consider whether we check the namespace-for-namespaces
0213: // rather than the node name.
0214: //
0215: // %TBD% Note that the DOM does not necessarily explicitly declare
0216: // all the namespaces it uses. DOM Level 3 will introduce a
0217: // namespace-normalization operation which reconciles that, and we
0218: // can request that users invoke it or otherwise ensure that the
0219: // tree is namespace-well-formed before passing the DOM to Xalan.
0220: // But if they don't, what should we do about it? We probably
0221: // don't want to alter the source DOM (and may not be able to do
0222: // so if it's read-only). The best available answer might be to
0223: // synthesize additional DTM Namespace Nodes that don't correspond
0224: // to DOM Attr Nodes.
0225: if (Node.ATTRIBUTE_NODE == type) {
0226: String name = node.getNodeName();
0227:
0228: if (name.startsWith("xmlns:") || name.equals("xmlns")) {
0229: type = DTM.NAMESPACE_NODE;
0230: }
0231: }
0232:
0233: m_nodes.addElement(node);
0234:
0235: m_firstch.setElementAt(NOTPROCESSED, nodeIndex);
0236: m_nextsib.setElementAt(NOTPROCESSED, nodeIndex);
0237: m_prevsib.setElementAt(previousSibling, nodeIndex);
0238: m_parent.setElementAt(parentIndex, nodeIndex);
0239:
0240: if (DTM.NULL != parentIndex && type != DTM.ATTRIBUTE_NODE
0241: && type != DTM.NAMESPACE_NODE) {
0242: // If the DTM parent had no children, this becomes its first child.
0243: if (NOTPROCESSED == m_firstch.elementAt(parentIndex))
0244: m_firstch.setElementAt(nodeIndex, parentIndex);
0245: }
0246:
0247: String nsURI = node.getNamespaceURI();
0248:
0249: // Deal with the difference between Namespace spec and XSLT
0250: // definitions of local name. (The former says PIs don't have
0251: // localnames; the latter says they do.)
0252: String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? node
0253: .getNodeName()
0254: : node.getLocalName();
0255:
0256: // Hack to make DOM1 sort of work...
0257: if (((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
0258: && null == localName)
0259: localName = node.getNodeName(); // -sb
0260:
0261: ExpandedNameTable exnt = m_expandedNameTable;
0262:
0263: // %TBD% Nodes created with the old non-namespace-aware DOM
0264: // calls createElement() and createAttribute() will never have a
0265: // localname. That will cause their expandedNameID to be just the
0266: // nodeType... which will keep them from being matched
0267: // successfully by name. Since the DOM makes no promise that
0268: // those will participate in namespace processing, this is
0269: // officially accepted as Not Our Fault. But it might be nice to
0270: // issue a diagnostic message!
0271: if (node.getLocalName() == null
0272: && (type == Node.ELEMENT_NODE || type == Node.ATTRIBUTE_NODE)) {
0273: // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
0274: }
0275:
0276: int expandedNameID = (null != localName) ? exnt
0277: .getExpandedTypeID(nsURI, localName, type) : exnt
0278: .getExpandedTypeID(type);
0279:
0280: m_exptype.setElementAt(expandedNameID, nodeIndex);
0281:
0282: indexNode(expandedNameID, nodeIndex);
0283:
0284: if (DTM.NULL != previousSibling)
0285: m_nextsib.setElementAt(nodeIndex, previousSibling);
0286:
0287: // This should be done after m_exptype has been set, and probably should
0288: // always be the last thing we do
0289: if (type == DTM.NAMESPACE_NODE)
0290: declareNamespaceInContext(parentIndex, nodeIndex);
0291:
0292: return nodeIndex;
0293: }
0294:
0295: /**
0296: * Get the number of nodes that have been added.
0297: */
0298: public int getNumberOfNodes() {
0299: return m_nodes.size();
0300: }
0301:
0302: /**
0303: * This method iterates to the next node that will be added to the table.
0304: * Each call to this method adds a new node to the table, unless the end
0305: * is reached, in which case it returns null.
0306: *
0307: * @return The true if a next node is found or false if
0308: * there are no more nodes.
0309: */
0310: protected boolean nextNode() {
0311: // Non-recursive one-fetch-at-a-time depth-first traversal with
0312: // attribute/namespace nodes and white-space stripping.
0313: // Navigating the DOM is simple, navigating the DTM is simple;
0314: // keeping track of both at once is a trifle baroque but at least
0315: // we've avoided most of the special cases.
0316: if (m_nodesAreProcessed)
0317: return false;
0318:
0319: // %REVIEW% Is this local copy Really Useful from a performance
0320: // point of view? Or is this a false microoptimization?
0321: Node pos = m_pos;
0322: Node next = null;
0323: int nexttype = NULL;
0324:
0325: // Navigate DOM tree
0326: do {
0327: // Look down to first child.
0328: if (pos.hasChildNodes()) {
0329: next = pos.getFirstChild();
0330:
0331: // %REVIEW% There's probably a more elegant way to skip
0332: // the doctype. (Just let it go and Suppress it?
0333: if (next != null
0334: && DOCUMENT_TYPE_NODE == next.getNodeType())
0335: next = next.getNextSibling();
0336:
0337: // Push DTM context -- except for children of Entity References,
0338: // which have no DTM equivalent and cause no DTM navigation.
0339: if (ENTITY_REFERENCE_NODE != pos.getNodeType()) {
0340: m_last_parent = m_last_kid;
0341: m_last_kid = NULL;
0342: // Whitespace-handler context stacking
0343: if (null != m_wsfilter) {
0344: short wsv = m_wsfilter.getShouldStripSpace(
0345: makeNodeHandle(m_last_parent), this );
0346: boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) ? getShouldStripWhitespace()
0347: : (DTMWSFilter.STRIP == wsv);
0348: pushShouldStripWhitespace(shouldStrip);
0349: } // if(m_wsfilter)
0350: }
0351: }
0352:
0353: // If that fails, look up and right (but not past root!)
0354: else {
0355: if (m_last_kid != NULL) {
0356: // Last node posted at this level had no more children
0357: // If it has _no_ children, we need to record that.
0358: if (m_firstch.elementAt(m_last_kid) == NOTPROCESSED)
0359: m_firstch.setElementAt(NULL, m_last_kid);
0360: }
0361:
0362: while (m_last_parent != NULL) {
0363: // %REVIEW% There's probably a more elegant way to
0364: // skip the doctype. (Just let it go and Suppress it?
0365: next = pos.getNextSibling();
0366: if (next != null
0367: && DOCUMENT_TYPE_NODE == next.getNodeType())
0368: next = next.getNextSibling();
0369:
0370: if (next != null)
0371: break; // Found it!
0372:
0373: // No next-sibling found. Pop the DOM.
0374: pos = pos.getParentNode();
0375: if (pos == null) {
0376: // %TBD% Should never arise, but I want to be sure of that...
0377: if (JJK_DEBUG) {
0378: System.out
0379: .println("***** DOM2DTM Pop Control Flow problem");
0380: for (;;)
0381: ; // Freeze right here!
0382: }
0383: }
0384:
0385: // The only parents in the DTM are Elements. However,
0386: // the DOM could contain EntityReferences. If we
0387: // encounter one, pop it _without_ popping DTM.
0388: if (pos != null
0389: && ENTITY_REFERENCE_NODE == pos
0390: .getNodeType()) {
0391: // Nothing needs doing
0392: if (JJK_DEBUG)
0393: System.out
0394: .println("***** DOM2DTM popping EntRef");
0395: } else {
0396: popShouldStripWhitespace();
0397: // Fix and pop DTM
0398: if (m_last_kid == NULL)
0399: m_firstch.setElementAt(NULL, m_last_parent); // Popping from an element
0400: else
0401: m_nextsib.setElementAt(NULL, m_last_kid); // Popping from anything else
0402: m_last_parent = m_parent
0403: .elementAt(m_last_kid = m_last_parent);
0404: }
0405: }
0406: if (m_last_parent == NULL)
0407: next = null;
0408: }
0409:
0410: if (next != null)
0411: nexttype = next.getNodeType();
0412:
0413: // If it's an entity ref, advance past it.
0414: //
0415: // %REVIEW% Should we let this out the door and just suppress it?
0416: // More work, but simpler code, more likely to be correct, and
0417: // it doesn't happen very often. We'd get rid of the loop too.
0418: if (ENTITY_REFERENCE_NODE == nexttype)
0419: pos = next;
0420: } while (ENTITY_REFERENCE_NODE == nexttype);
0421:
0422: // Did we run out of the tree?
0423: if (next == null) {
0424: m_nextsib.setElementAt(NULL, 0);
0425: m_nodesAreProcessed = true;
0426: m_pos = null;
0427:
0428: if (JJK_DEBUG) {
0429: System.out.println("***** DOM2DTM Crosscheck:");
0430: for (int i = 0; i < m_nodes.size(); ++i)
0431: System.out.println(i + ":\t"
0432: + m_firstch.elementAt(i) + "\t"
0433: + m_nextsib.elementAt(i));
0434: }
0435:
0436: return false;
0437: }
0438:
0439: // Text needs some special handling:
0440: //
0441: // DTM may skip whitespace. This is handled by the suppressNode flag, which
0442: // when true will keep the DTM node from being created.
0443: //
0444: // DTM only directly records the first DOM node of any logically-contiguous
0445: // sequence. The lastTextNode value will be set to the last node in the
0446: // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
0447: // advance next over this whole block. Should be simpler than special-casing
0448: // the above loop for "Was the logically-preceeding sibling a text node".
0449: //
0450: // Finally, a DTM node should be considered a CDATASection only if all the
0451: // contiguous text it covers is CDATASections. The first Text should
0452: // force DTM to Text.
0453:
0454: boolean suppressNode = false;
0455: Node lastTextNode = null;
0456:
0457: nexttype = next.getNodeType();
0458:
0459: // nexttype=pos.getNodeType();
0460: if (TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) {
0461: // If filtering, initially assume we're going to suppress the node
0462: suppressNode = ((null != m_wsfilter) && getShouldStripWhitespace());
0463:
0464: // Scan logically contiguous text (siblings, plus "flattening"
0465: // of entity reference boundaries).
0466: Node n = next;
0467: while (n != null) {
0468: lastTextNode = n;
0469: // Any Text node means DTM considers it all Text
0470: if (TEXT_NODE == n.getNodeType())
0471: nexttype = TEXT_NODE;
0472: // Any non-whitespace in this sequence blocks whitespace
0473: // suppression
0474: suppressNode &= XMLCharacterRecognizer.isWhiteSpace(n
0475: .getNodeValue());
0476:
0477: n = logicalNextDOMTextNode(n);
0478: }
0479: }
0480:
0481: // Special handling for PIs: Some DOMs represent the XML
0482: // Declaration as a PI. This is officially incorrect, per the DOM
0483: // spec, but is considered a "wrong but tolerable" temporary
0484: // workaround pending proper handling of these fields in DOM Level
0485: // 3. We want to recognize and reject that case.
0486: else if (PROCESSING_INSTRUCTION_NODE == nexttype) {
0487: suppressNode = (pos.getNodeName().toLowerCase()
0488: .equals("xml"));
0489: }
0490:
0491: if (!suppressNode) {
0492: // Inserting next. NOTE that we force the node type; for
0493: // coalesced Text, this records CDATASections adjacent to
0494: // ordinary Text as Text.
0495: int nextindex = addNode(next, m_last_parent, m_last_kid,
0496: nexttype);
0497:
0498: m_last_kid = nextindex;
0499:
0500: if (ELEMENT_NODE == nexttype) {
0501: int attrIndex = NULL; // start with no previous sib
0502: // Process attributes _now_, rather than waiting.
0503: // Simpler control flow, makes NS cache available immediately.
0504: NamedNodeMap attrs = next.getAttributes();
0505: int attrsize = (attrs == null) ? 0 : attrs.getLength();
0506: if (attrsize > 0) {
0507: for (int i = 0; i < attrsize; ++i) {
0508: // No need to force nodetype in this case;
0509: // addNode() will take care of switching it from
0510: // Attr to Namespace if necessary.
0511: attrIndex = addNode(attrs.item(i), nextindex,
0512: attrIndex, NULL);
0513: m_firstch.setElementAt(DTM.NULL, attrIndex);
0514:
0515: // If the xml: prefix is explicitly declared
0516: // we don't need to synthesize one.
0517: //
0518: // NOTE that XML Namespaces were not originally
0519: // defined as being namespace-aware (grrr), and
0520: // while the W3C is planning to fix this it's
0521: // safer for now to test the QName and trust the
0522: // parsers to prevent anyone from redefining the
0523: // reserved xmlns: prefix
0524: if (!m_processedFirstElement
0525: && "xmlns:xml".equals(attrs.item(i)
0526: .getNodeName()))
0527: m_processedFirstElement = true;
0528: }
0529: // Terminate list of attrs, and make sure they aren't
0530: // considered children of the element
0531: } // if attrs exist
0532: if (!m_processedFirstElement) {
0533: // The DOM might not have an explicit declaration for the
0534: // implicit "xml:" prefix, but the XPath data model
0535: // requires that this appear as a Namespace Node so we
0536: // have to synthesize one. You can think of this as
0537: // being a default attribute defined by the XML
0538: // Namespaces spec rather than by the DTD.
0539: attrIndex = addNode(
0540: new DOM2DTMdefaultNamespaceDeclarationNode(
0541: (Element) next,
0542: "xml",
0543: NAMESPACE_DECL_NS,
0544: makeNodeHandle(((attrIndex == NULL) ? nextindex
0545: : attrIndex) + 1)),
0546: nextindex, attrIndex, NULL);
0547: m_firstch.setElementAt(DTM.NULL, attrIndex);
0548: m_processedFirstElement = true;
0549: }
0550: if (attrIndex != NULL)
0551: m_nextsib.setElementAt(DTM.NULL, attrIndex);
0552: } //if(ELEMENT_NODE)
0553: } // (if !suppressNode)
0554:
0555: // Text postprocessing: Act on values stored above
0556: if (TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) {
0557: // %TBD% If nexttype was forced to TEXT, patch the DTM node
0558:
0559: next = lastTextNode; // Advance the DOM cursor over contiguous text
0560: }
0561:
0562: // Remember where we left off.
0563: m_pos = next;
0564: return true;
0565: }
0566:
0567: /**
0568: * Return an DOM node for the given node.
0569: *
0570: * @param nodeHandle The node ID.
0571: *
0572: * @return A node representation of the DTM node.
0573: */
0574: public Node getNode(int nodeHandle) {
0575:
0576: int identity = makeNodeIdentity(nodeHandle);
0577:
0578: return (Node) m_nodes.elementAt(identity);
0579: }
0580:
0581: /**
0582: * Get a Node from an identity index.
0583: *
0584: * NEEDSDOC @param nodeIdentity
0585: *
0586: * NEEDSDOC ($objectName$) @return
0587: */
0588: protected Node lookupNode(int nodeIdentity) {
0589: return (Node) m_nodes.elementAt(nodeIdentity);
0590: }
0591:
0592: /**
0593: * Get the next node identity value in the list, and call the iterator
0594: * if it hasn't been added yet.
0595: *
0596: * @param identity The node identity (index).
0597: * @return identity+1, or DTM.NULL.
0598: */
0599: protected int getNextNodeIdentity(int identity) {
0600:
0601: identity += 1;
0602:
0603: if (identity >= m_nodes.size()) {
0604: if (!nextNode())
0605: identity = DTM.NULL;
0606: }
0607:
0608: return identity;
0609: }
0610:
0611: /**
0612: * Get the handle from a Node.
0613: * <p>%OPT% This will be pretty slow.</p>
0614: *
0615: * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
0616: * walk down DTM reconstructing path) might be considerably faster
0617: * on later nodes in large documents. That might also imply improving
0618: * this call to handle nodes which would be in this DTM but
0619: * have not yet been built, which might or might not be a Good Thing.</p>
0620: *
0621: * %REVIEW% This relies on being able to test node-identity via
0622: * object-identity. DTM2DOM proxying is a great example of a case where
0623: * that doesn't work. DOM Level 3 will provide the isSameNode() method
0624: * to fix that, but until then this is going to be flaky.
0625: *
0626: * @param node A node, which may be null.
0627: *
0628: * @return The node handle or <code>DTM.NULL</code>.
0629: */
0630: private int getHandleFromNode(Node node) {
0631: if (null != node) {
0632: int len = m_nodes.size();
0633: boolean isMore;
0634: int i = 0;
0635: do {
0636: for (; i < len; i++) {
0637: if (m_nodes.elementAt(i) == node)
0638: return makeNodeHandle(i);
0639: }
0640:
0641: isMore = nextNode();
0642:
0643: len = m_nodes.size();
0644:
0645: } while (isMore || i < len);
0646: }
0647:
0648: return DTM.NULL;
0649: }
0650:
0651: /** Get the handle from a Node. This is a more robust version of
0652: * getHandleFromNode, intended to be usable by the public.
0653: *
0654: * <p>%OPT% This will be pretty slow.</p>
0655: *
0656: * %REVIEW% This relies on being able to test node-identity via
0657: * object-identity. DTM2DOM proxying is a great example of a case where
0658: * that doesn't work. DOM Level 3 will provide the isSameNode() method
0659: * to fix that, but until then this is going to be flaky.
0660: *
0661: * @param node A node, which may be null.
0662: *
0663: * @return The node handle or <code>DTM.NULL</code>. */
0664: public int getHandleOfNode(Node node) {
0665: if (null != node) {
0666: // Is Node actually within the same document? If not, don't search!
0667: // This would be easier if m_root was always the Document node, but
0668: // we decided to allow wrapping a DTM around a subtree.
0669: if ((m_root == node)
0670: || (m_root.getNodeType() == DOCUMENT_NODE && m_root == node
0671: .getOwnerDocument())
0672: || (m_root.getNodeType() != DOCUMENT_NODE && m_root
0673: .getOwnerDocument() == node
0674: .getOwnerDocument())) {
0675: // If node _is_ in m_root's tree, find its handle
0676: //
0677: // %OPT% This check may be improved significantly when DOM
0678: // Level 3 nodeKey and relative-order tests become
0679: // available!
0680: for (Node cursor = node; cursor != null; cursor = (cursor
0681: .getNodeType() != ATTRIBUTE_NODE) ? cursor
0682: .getParentNode() : ((org.w3c.dom.Attr) cursor)
0683: .getOwnerElement()) {
0684: if (cursor == m_root)
0685: // We know this node; find its handle.
0686: return getHandleFromNode(node);
0687: } // for ancestors of node
0688: } // if node and m_root in same Document
0689: } // if node!=null
0690:
0691: return DTM.NULL;
0692: }
0693:
0694: /**
0695: * Retrieves an attribute node by by qualified name and namespace URI.
0696: *
0697: * @param nodeHandle int Handle of the node upon which to look up this attribute..
0698: * @param namespaceURI The namespace URI of the attribute to
0699: * retrieve, or null.
0700: * @param name The local name of the attribute to
0701: * retrieve.
0702: * @return The attribute node handle with the specified name (
0703: * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
0704: * attribute.
0705: */
0706: public int getAttributeNode(int nodeHandle, String namespaceURI,
0707: String name) {
0708:
0709: // %OPT% This is probably slower than it needs to be.
0710: if (null == namespaceURI)
0711: namespaceURI = "";
0712:
0713: int type = getNodeType(nodeHandle);
0714:
0715: if (DTM.ELEMENT_NODE == type) {
0716:
0717: // Assume that attributes immediately follow the element.
0718: int identity = makeNodeIdentity(nodeHandle);
0719:
0720: while (DTM.NULL != (identity = getNextNodeIdentity(identity))) {
0721: // Assume this can not be null.
0722: type = _type(identity);
0723:
0724: // %REVIEW%
0725: // Should namespace nodes be retrievable DOM-style as attrs?
0726: // If not we need a separate function... which may be desirable
0727: // architecturally, but which is ugly from a code point of view.
0728: // (If we REALLY insist on it, this code should become a subroutine
0729: // of both -- retrieve the node, then test if the type matches
0730: // what you're looking for.)
0731: if (type == DTM.ATTRIBUTE_NODE
0732: || type == DTM.NAMESPACE_NODE) {
0733: Node node = lookupNode(identity);
0734: String nodeuri = node.getNamespaceURI();
0735:
0736: if (null == nodeuri)
0737: nodeuri = "";
0738:
0739: String nodelocalname = node.getLocalName();
0740:
0741: if (nodeuri.equals(namespaceURI)
0742: && name.equals(nodelocalname))
0743: return makeNodeHandle(identity);
0744: }
0745:
0746: else // if (DTM.NAMESPACE_NODE != type)
0747: {
0748: break;
0749: }
0750: }
0751: }
0752:
0753: return DTM.NULL;
0754: }
0755:
0756: /**
0757: * Get the string-value of a node as a String object
0758: * (see http://www.w3.org/TR/xpath#data-model
0759: * for the definition of a node's string-value).
0760: *
0761: * @param nodeHandle The node ID.
0762: *
0763: * @return A string object that represents the string-value of the given node.
0764: */
0765: public XMLString getStringValue(int nodeHandle) {
0766:
0767: int type = getNodeType(nodeHandle);
0768: Node node = getNode(nodeHandle);
0769: // %TBD% If an element only has one text node, we should just use it
0770: // directly.
0771: if (DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
0772: || DTM.DOCUMENT_FRAGMENT_NODE == type) {
0773: FastStringBuffer buf = StringBufferPool.get();
0774: String s;
0775:
0776: try {
0777: getNodeData(node, buf);
0778:
0779: s = (buf.length() > 0) ? buf.toString() : "";
0780: } finally {
0781: StringBufferPool.free(buf);
0782: }
0783:
0784: return m_xstrf.newstr(s);
0785: } else if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
0786: // If this is a DTM text node, it may be made of multiple DOM text
0787: // nodes -- including navigating into Entity References. DOM2DTM
0788: // records the first node in the sequence and requires that we
0789: // pick up the others when we retrieve the DTM node's value.
0790: //
0791: // %REVIEW% DOM Level 3 is expected to add a "whole text"
0792: // retrieval method which performs this function for us.
0793: FastStringBuffer buf = StringBufferPool.get();
0794: while (node != null) {
0795: buf.append(node.getNodeValue());
0796: node = logicalNextDOMTextNode(node);
0797: }
0798: String s = (buf.length() > 0) ? buf.toString() : "";
0799: StringBufferPool.free(buf);
0800: return m_xstrf.newstr(s);
0801: } else
0802: return m_xstrf.newstr(node.getNodeValue());
0803: }
0804:
0805: /**
0806: * Determine if the string-value of a node is whitespace
0807: *
0808: * @param nodeHandle The node Handle.
0809: *
0810: * @return Return true if the given node is whitespace.
0811: */
0812: public boolean isWhitespace(int nodeHandle) {
0813: int type = getNodeType(nodeHandle);
0814: Node node = getNode(nodeHandle);
0815: if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
0816: // If this is a DTM text node, it may be made of multiple DOM text
0817: // nodes -- including navigating into Entity References. DOM2DTM
0818: // records the first node in the sequence and requires that we
0819: // pick up the others when we retrieve the DTM node's value.
0820: //
0821: // %REVIEW% DOM Level 3 is expected to add a "whole text"
0822: // retrieval method which performs this function for us.
0823: FastStringBuffer buf = StringBufferPool.get();
0824: while (node != null) {
0825: buf.append(node.getNodeValue());
0826: node = logicalNextDOMTextNode(node);
0827: }
0828: boolean b = buf.isWhitespace(0, buf.length());
0829: StringBufferPool.free(buf);
0830: return b;
0831: }
0832: return false;
0833: }
0834:
0835: /**
0836: * Retrieve the text content of a DOM subtree, appending it into a
0837: * user-supplied FastStringBuffer object. Note that attributes are
0838: * not considered part of the content of an element.
0839: * <p>
0840: * There are open questions regarding whitespace stripping.
0841: * Currently we make no special effort in that regard, since the standard
0842: * DOM doesn't yet provide DTD-based information to distinguish
0843: * whitespace-in-element-context from genuine #PCDATA. Note that we
0844: * should probably also consider xml:space if/when we address this.
0845: * DOM Level 3 may solve the problem for us.
0846: * <p>
0847: * %REVIEW% Actually, since this method operates on the DOM side of the
0848: * fence rather than the DTM side, it SHOULDN'T do
0849: * any special handling. The DOM does what the DOM does; if you want
0850: * DTM-level abstractions, use DTM-level methods.
0851: *
0852: * @param node Node whose subtree is to be walked, gathering the
0853: * contents of all Text or CDATASection nodes.
0854: * @param buf FastStringBuffer into which the contents of the text
0855: * nodes are to be concatenated.
0856: */
0857: protected static void getNodeData(Node node, FastStringBuffer buf) {
0858:
0859: switch (node.getNodeType()) {
0860: case Node.DOCUMENT_FRAGMENT_NODE:
0861: case Node.DOCUMENT_NODE:
0862: case Node.ELEMENT_NODE: {
0863: for (Node child = node.getFirstChild(); null != child; child = child
0864: .getNextSibling()) {
0865: getNodeData(child, buf);
0866: }
0867: }
0868: break;
0869: case Node.TEXT_NODE:
0870: case Node.CDATA_SECTION_NODE:
0871: case Node.ATTRIBUTE_NODE: // Never a child but might be our starting node
0872: buf.append(node.getNodeValue());
0873: break;
0874: case Node.PROCESSING_INSTRUCTION_NODE:
0875: // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
0876: break;
0877: default:
0878: // ignore
0879: break;
0880: }
0881: }
0882:
0883: /**
0884: * Given a node handle, return its DOM-style node name. This will
0885: * include names such as #text or #document.
0886: *
0887: * @param nodeHandle the id of the node.
0888: * @return String Name of this node, which may be an empty string.
0889: * %REVIEW% Document when empty string is possible...
0890: * %REVIEW-COMMENT% It should never be empty, should it?
0891: */
0892: public String getNodeName(int nodeHandle) {
0893:
0894: Node node = getNode(nodeHandle);
0895:
0896: // Assume non-null.
0897: return node.getNodeName();
0898: }
0899:
0900: /**
0901: * Given a node handle, return the XPath node name. This should be
0902: * the name as described by the XPath data model, NOT the DOM-style
0903: * name.
0904: *
0905: * @param nodeHandle the id of the node.
0906: * @return String Name of this node, which may be an empty string.
0907: */
0908: public String getNodeNameX(int nodeHandle) {
0909:
0910: String name;
0911: short type = getNodeType(nodeHandle);
0912:
0913: switch (type) {
0914: case DTM.NAMESPACE_NODE: {
0915: Node node = getNode(nodeHandle);
0916:
0917: // assume not null.
0918: name = node.getNodeName();
0919: if (name.startsWith("xmlns:")) {
0920: name = QName.getLocalPart(name);
0921: } else if (name.equals("xmlns")) {
0922: name = "";
0923: }
0924: }
0925: break;
0926: case DTM.ATTRIBUTE_NODE:
0927: case DTM.ELEMENT_NODE:
0928: case DTM.ENTITY_REFERENCE_NODE:
0929: case DTM.PROCESSING_INSTRUCTION_NODE: {
0930: Node node = getNode(nodeHandle);
0931:
0932: // assume not null.
0933: name = node.getNodeName();
0934: }
0935: break;
0936: default:
0937: name = "";
0938: }
0939:
0940: return name;
0941: }
0942:
0943: /**
0944: * Given a node handle, return its XPath-style localname.
0945: * (As defined in Namespaces, this is the portion of the name after any
0946: * colon character).
0947: *
0948: * @param nodeHandle the id of the node.
0949: * @return String Local name of this node.
0950: */
0951: public String getLocalName(int nodeHandle) {
0952: if (JJK_NEWCODE) {
0953: int id = makeNodeIdentity(nodeHandle);
0954: if (NULL == id)
0955: return null;
0956: Node newnode = (Node) m_nodes.elementAt(id);
0957: String newname = newnode.getLocalName();
0958: if (null == newname) {
0959: // XSLT treats PIs, and possibly other things, as having QNames.
0960: String qname = newnode.getNodeName();
0961: if ('#' == qname.charAt(0)) {
0962: // Match old default for this function
0963: // This conversion may or may not be necessary
0964: newname = "";
0965: } else {
0966: int index = qname.indexOf(':');
0967: newname = (index < 0) ? qname : qname
0968: .substring(index + 1);
0969: }
0970: }
0971: return newname;
0972: } else {
0973: String name;
0974: short type = getNodeType(nodeHandle);
0975: switch (type) {
0976: case DTM.ATTRIBUTE_NODE:
0977: case DTM.ELEMENT_NODE:
0978: case DTM.ENTITY_REFERENCE_NODE:
0979: case DTM.NAMESPACE_NODE:
0980: case DTM.PROCESSING_INSTRUCTION_NODE: {
0981: Node node = getNode(nodeHandle);
0982:
0983: // assume not null.
0984: name = node.getLocalName();
0985:
0986: if (null == name) {
0987: String qname = node.getNodeName();
0988: int index = qname.indexOf(':');
0989:
0990: name = (index < 0) ? qname : qname
0991: .substring(index + 1);
0992: }
0993: }
0994: break;
0995: default:
0996: name = "";
0997: }
0998: return name;
0999: }
1000: }
1001:
1002: /**
1003: * Given a namespace handle, return the prefix that the namespace decl is
1004: * mapping.
1005: * Given a node handle, return the prefix used to map to the namespace.
1006: *
1007: * <p> %REVIEW% Are you sure you want "" for no prefix? </p>
1008: * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p>
1009: *
1010: * @param nodeHandle the id of the node.
1011: * @return String prefix of this node's name, or "" if no explicit
1012: * namespace prefix was given.
1013: */
1014: public String getPrefix(int nodeHandle) {
1015:
1016: String prefix;
1017: short type = getNodeType(nodeHandle);
1018:
1019: switch (type) {
1020: case DTM.NAMESPACE_NODE: {
1021: Node node = getNode(nodeHandle);
1022:
1023: // assume not null.
1024: String qname = node.getNodeName();
1025: int index = qname.indexOf(':');
1026:
1027: prefix = (index < 0) ? "" : qname.substring(index + 1);
1028: }
1029: break;
1030: case DTM.ATTRIBUTE_NODE:
1031: case DTM.ELEMENT_NODE: {
1032: Node node = getNode(nodeHandle);
1033:
1034: // assume not null.
1035: String qname = node.getNodeName();
1036: int index = qname.indexOf(':');
1037:
1038: prefix = (index < 0) ? "" : qname.substring(0, index);
1039: }
1040: break;
1041: default:
1042: prefix = "";
1043: }
1044:
1045: return prefix;
1046: }
1047:
1048: /**
1049: * Given a node handle, return its DOM-style namespace URI
1050: * (As defined in Namespaces, this is the declared URI which this node's
1051: * prefix -- or default in lieu thereof -- was mapped to.)
1052: *
1053: * <p>%REVIEW% Null or ""? -sb</p>
1054: *
1055: * @param nodeHandle the id of the node.
1056: * @return String URI value of this node's namespace, or null if no
1057: * namespace was resolved.
1058: */
1059: public String getNamespaceURI(int nodeHandle) {
1060: if (JJK_NEWCODE) {
1061: int id = makeNodeIdentity(nodeHandle);
1062: if (id == NULL)
1063: return null;
1064: Node node = (Node) m_nodes.elementAt(id);
1065: return node.getNamespaceURI();
1066: } else {
1067: String nsuri;
1068: short type = getNodeType(nodeHandle);
1069:
1070: switch (type) {
1071: case DTM.ATTRIBUTE_NODE:
1072: case DTM.ELEMENT_NODE:
1073: case DTM.ENTITY_REFERENCE_NODE:
1074: case DTM.NAMESPACE_NODE:
1075: case DTM.PROCESSING_INSTRUCTION_NODE: {
1076: Node node = getNode(nodeHandle);
1077:
1078: // assume not null.
1079: nsuri = node.getNamespaceURI();
1080:
1081: // %TBD% Handle DOM1?
1082: }
1083: break;
1084: default:
1085: nsuri = null;
1086: }
1087:
1088: return nsuri;
1089: }
1090:
1091: }
1092:
1093: /** Utility function: Given a DOM Text node, determine whether it is
1094: * logically followed by another Text or CDATASection node. This may
1095: * involve traversing into Entity References.
1096: *
1097: * %REVIEW% DOM Level 3 is expected to add functionality which may
1098: * allow us to retire this.
1099: */
1100: private Node logicalNextDOMTextNode(Node n) {
1101: Node p = n.getNextSibling();
1102: if (p == null) {
1103: // Walk out of any EntityReferenceNodes that ended with text
1104: for (n = n.getParentNode(); n != null
1105: && ENTITY_REFERENCE_NODE == n.getNodeType(); n = n
1106: .getParentNode()) {
1107: p = n.getNextSibling();
1108: if (p != null)
1109: break;
1110: }
1111: }
1112: n = p;
1113: while (n != null && ENTITY_REFERENCE_NODE == n.getNodeType()) {
1114: // Walk into any EntityReferenceNodes that start with text
1115: if (n.hasChildNodes())
1116: n = n.getFirstChild();
1117: else
1118: n = n.getNextSibling();
1119: }
1120: if (n != null) {
1121: // Found a logical next sibling. Is it text?
1122: int ntype = n.getNodeType();
1123: if (TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1124: n = null;
1125: }
1126: return n;
1127: }
1128:
1129: /**
1130: * Given a node handle, return its node value. This is mostly
1131: * as defined by the DOM, but may ignore some conveniences.
1132: * <p>
1133: *
1134: * @param nodeHandle The node id.
1135: * @return String Value of this node, or null if not
1136: * meaningful for this node type.
1137: */
1138: public String getNodeValue(int nodeHandle) {
1139: // The _type(nodeHandle) call was taking the lion's share of our
1140: // time, and was wrong anyway since it wasn't coverting handle to
1141: // identity. Inlined it.
1142: int type = _exptype(makeNodeIdentity(nodeHandle));
1143: type = (NULL != type) ? getNodeType(nodeHandle) : NULL;
1144:
1145: if (TEXT_NODE != type && CDATA_SECTION_NODE != type)
1146: return getNode(nodeHandle).getNodeValue();
1147:
1148: // If this is a DTM text node, it may be made of multiple DOM text
1149: // nodes -- including navigating into Entity References. DOM2DTM
1150: // records the first node in the sequence and requires that we
1151: // pick up the others when we retrieve the DTM node's value.
1152: //
1153: // %REVIEW% DOM Level 3 is expected to add a "whole text"
1154: // retrieval method which performs this function for us.
1155: Node node = getNode(nodeHandle);
1156: Node n = logicalNextDOMTextNode(node);
1157: if (n == null)
1158: return node.getNodeValue();
1159:
1160: FastStringBuffer buf = StringBufferPool.get();
1161: buf.append(node.getNodeValue());
1162: while (n != null) {
1163: buf.append(n.getNodeValue());
1164: n = logicalNextDOMTextNode(n);
1165: }
1166: String s = (buf.length() > 0) ? buf.toString() : "";
1167: StringBufferPool.free(buf);
1168: return s;
1169: }
1170:
1171: /**
1172: * A document type declaration information item has the following properties:
1173: *
1174: * 1. [system identifier] The system identifier of the external subset, if
1175: * it exists. Otherwise this property has no value.
1176: *
1177: * @return the system identifier String object, or null if there is none.
1178: */
1179: public String getDocumentTypeDeclarationSystemIdentifier() {
1180:
1181: Document doc;
1182:
1183: if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1184: doc = (Document) m_root;
1185: else
1186: doc = m_root.getOwnerDocument();
1187:
1188: if (null != doc) {
1189: DocumentType dtd = doc.getDoctype();
1190:
1191: if (null != dtd) {
1192: return dtd.getSystemId();
1193: }
1194: }
1195:
1196: return null;
1197: }
1198:
1199: /**
1200: * Return the public identifier of the external subset,
1201: * normalized as described in 4.2.2 External Entities [XML]. If there is
1202: * no external subset or if it has no public identifier, this property
1203: * has no value.
1204: *
1205: * @return the public identifier String object, or null if there is none.
1206: */
1207: public String getDocumentTypeDeclarationPublicIdentifier() {
1208:
1209: Document doc;
1210:
1211: if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1212: doc = (Document) m_root;
1213: else
1214: doc = m_root.getOwnerDocument();
1215:
1216: if (null != doc) {
1217: DocumentType dtd = doc.getDoctype();
1218:
1219: if (null != dtd) {
1220: return dtd.getPublicId();
1221: }
1222: }
1223:
1224: return null;
1225: }
1226:
1227: /**
1228: * Returns the <code>Element</code> whose <code>ID</code> is given by
1229: * <code>elementId</code>. If no such element exists, returns
1230: * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1231: * has this <code>ID</code>. Attributes (including those
1232: * with the name "ID") are not of type ID unless so defined by DTD/Schema
1233: * information available to the DTM implementation.
1234: * Implementations that do not know whether attributes are of type ID or
1235: * not are expected to return <code>DTM.NULL</code>.
1236: *
1237: * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1238: * and this operation searches only within a single document, right?
1239: * Wouldn't want collisions between DTMs in the same process.</p>
1240: *
1241: * @param elementId The unique <code>id</code> value for an element.
1242: * @return The handle of the matching element.
1243: */
1244: public int getElementById(String elementId) {
1245:
1246: Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) ? (Document) m_root
1247: : m_root.getOwnerDocument();
1248:
1249: if (null != doc) {
1250: Node elem = doc.getElementById(elementId);
1251: if (null != elem) {
1252: int elemHandle = getHandleFromNode(elem);
1253:
1254: if (DTM.NULL == elemHandle) {
1255: int identity = m_nodes.size() - 1;
1256: while (DTM.NULL != (identity = getNextNodeIdentity(identity))) {
1257: Node node = getNode(identity);
1258: if (node == elem) {
1259: elemHandle = getHandleFromNode(elem);
1260: break;
1261: }
1262: }
1263: }
1264:
1265: return elemHandle;
1266: }
1267:
1268: }
1269: return DTM.NULL;
1270: }
1271:
1272: /**
1273: * The getUnparsedEntityURI function returns the URI of the unparsed
1274: * entity with the specified name in the same document as the context
1275: * node (see [3.3 Unparsed Entities]). It returns the empty string if
1276: * there is no such entity.
1277: * <p>
1278: * XML processors may choose to use the System Identifier (if one
1279: * is provided) to resolve the entity, rather than the URI in the
1280: * Public Identifier. The details are dependent on the processor, and
1281: * we would have to support some form of plug-in resolver to handle
1282: * this properly. Currently, we simply return the System Identifier if
1283: * present, and hope that it a usable URI or that our caller can
1284: * map it to one.
1285: * TODO: Resolve Public Identifiers... or consider changing function name.
1286: * <p>
1287: * If we find a relative URI
1288: * reference, XML expects it to be resolved in terms of the base URI
1289: * of the document. The DOM doesn't do that for us, and it isn't
1290: * entirely clear whether that should be done here; currently that's
1291: * pushed up to a higher level of our application. (Note that DOM Level
1292: * 1 didn't store the document's base URI.)
1293: * TODO: Consider resolving Relative URIs.
1294: * <p>
1295: * (The DOM's statement that "An XML processor may choose to
1296: * completely expand entities before the structure model is passed
1297: * to the DOM" refers only to parsed entities, not unparsed, and hence
1298: * doesn't affect this function.)
1299: *
1300: * @param name A string containing the Entity Name of the unparsed
1301: * entity.
1302: *
1303: * @return String containing the URI of the Unparsed Entity, or an
1304: * empty string if no such entity exists.
1305: */
1306: public String getUnparsedEntityURI(String name) {
1307:
1308: String url = "";
1309: Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) ? (Document) m_root
1310: : m_root.getOwnerDocument();
1311:
1312: if (null != doc) {
1313: DocumentType doctype = doc.getDoctype();
1314:
1315: if (null != doctype) {
1316: NamedNodeMap entities = doctype.getEntities();
1317: if (null == entities)
1318: return url;
1319: Entity entity = (Entity) entities.getNamedItem(name);
1320: if (null == entity)
1321: return url;
1322:
1323: String notationName = entity.getNotationName();
1324:
1325: if (null != notationName) // then it's unparsed
1326: {
1327: // The draft says: "The XSLT processor may use the public
1328: // identifier to generate a URI for the entity instead of the URI
1329: // specified in the system identifier. If the XSLT processor does
1330: // not use the public identifier to generate the URI, it must use
1331: // the system identifier; if the system identifier is a relative
1332: // URI, it must be resolved into an absolute URI using the URI of
1333: // the resource containing the entity declaration as the base
1334: // URI [RFC2396]."
1335: // So I'm falling a bit short here.
1336: url = entity.getSystemId();
1337:
1338: if (null == url) {
1339: url = entity.getPublicId();
1340: } else {
1341: // This should be resolved to an absolute URL, but that's hard
1342: // to do from here.
1343: }
1344: }
1345: }
1346: }
1347:
1348: return url;
1349: }
1350:
1351: /**
1352: * 5. [specified] A flag indicating whether this attribute was actually
1353: * specified in the start-tag of its element, or was defaulted from the
1354: * DTD.
1355: *
1356: * @param attributeHandle the attribute handle
1357: * @return <code>true</code> if the attribute was specified;
1358: * <code>false</code> if it was defaulted.
1359: */
1360: public boolean isAttributeSpecified(int attributeHandle) {
1361: int type = getNodeType(attributeHandle);
1362:
1363: if (DTM.ATTRIBUTE_NODE == type) {
1364: Attr attr = (Attr) getNode(attributeHandle);
1365: return attr.getSpecified();
1366: }
1367: return false;
1368: }
1369:
1370: /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1371: * we're wrapped around an existing DOM.
1372: *
1373: * @param source The IncrementalSAXSource that we want to recieve events from
1374: * on demand.
1375: */
1376: public void setIncrementalSAXSource(IncrementalSAXSource source) {
1377: }
1378:
1379: /** getContentHandler returns "our SAX builder" -- the thing that
1380: * someone else should send SAX events to in order to extend this
1381: * DTM model.
1382: *
1383: * @return null if this model doesn't respond to SAX events,
1384: * "this" if the DTM object has a built-in SAX ContentHandler,
1385: * the IncrmentalSAXSource if we're bound to one and should receive
1386: * the SAX stream via it for incremental build purposes...
1387: * */
1388: public org.xml.sax.ContentHandler getContentHandler() {
1389: return null;
1390: }
1391:
1392: /**
1393: * Return this DTM's lexical handler.
1394: *
1395: * %REVIEW% Should this return null if constrution already done/begun?
1396: *
1397: * @return null if this model doesn't respond to lexical SAX events,
1398: * "this" if the DTM object has a built-in SAX ContentHandler,
1399: * the IncrementalSAXSource if we're bound to one and should receive
1400: * the SAX stream via it for incremental build purposes...
1401: */
1402: public org.xml.sax.ext.LexicalHandler getLexicalHandler() {
1403:
1404: return null;
1405: }
1406:
1407: /**
1408: * Return this DTM's EntityResolver.
1409: *
1410: * @return null if this model doesn't respond to SAX entity ref events.
1411: */
1412: public org.xml.sax.EntityResolver getEntityResolver() {
1413:
1414: return null;
1415: }
1416:
1417: /**
1418: * Return this DTM's DTDHandler.
1419: *
1420: * @return null if this model doesn't respond to SAX dtd events.
1421: */
1422: public org.xml.sax.DTDHandler getDTDHandler() {
1423:
1424: return null;
1425: }
1426:
1427: /**
1428: * Return this DTM's ErrorHandler.
1429: *
1430: * @return null if this model doesn't respond to SAX error events.
1431: */
1432: public org.xml.sax.ErrorHandler getErrorHandler() {
1433:
1434: return null;
1435: }
1436:
1437: /**
1438: * Return this DTM's DeclHandler.
1439: *
1440: * @return null if this model doesn't respond to SAX Decl events.
1441: */
1442: public org.xml.sax.ext.DeclHandler getDeclHandler() {
1443:
1444: return null;
1445: }
1446:
1447: /** @return true iff we're building this model incrementally (eg
1448: * we're partnered with a IncrementalSAXSource) and thus require that the
1449: * transformation and the parse run simultaneously. Guidance to the
1450: * DTMManager.
1451: * */
1452: public boolean needsTwoThreads() {
1453: return false;
1454: }
1455:
1456: // ========== Direct SAX Dispatch, for optimization purposes ========
1457:
1458: /**
1459: * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1460: * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1461: * the definition of <CODE>S</CODE></A> for details.
1462: * @param ch Character to check as XML whitespace.
1463: * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
1464: */
1465: private static boolean isSpace(char ch) {
1466: return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now.
1467: }
1468:
1469: /**
1470: * Directly call the
1471: * characters method on the passed ContentHandler for the
1472: * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1473: * for the definition of a node's string-value). Multiple calls to the
1474: * ContentHandler's characters methods may well occur for a single call to
1475: * this method.
1476: *
1477: * @param nodeHandle The node ID.
1478: * @param ch A non-null reference to a ContentHandler.
1479: *
1480: * @throws org.xml.sax.SAXException
1481: */
1482: public void dispatchCharactersEvents(int nodeHandle,
1483: org.xml.sax.ContentHandler ch, boolean normalize)
1484: throws org.xml.sax.SAXException {
1485: if (normalize) {
1486: XMLString str = getStringValue(nodeHandle);
1487: str = str.fixWhiteSpace(true, true, false);
1488: str.dispatchCharactersEvents(ch);
1489: } else {
1490: int type = getNodeType(nodeHandle);
1491: Node node = getNode(nodeHandle);
1492: dispatchNodeData(node, ch, 0);
1493: // Text coalition -- a DTM text node may represent multiple
1494: // DOM nodes.
1495: if (TEXT_NODE == type || CDATA_SECTION_NODE == type) {
1496: while (null != (node = logicalNextDOMTextNode(node))) {
1497: dispatchNodeData(node, ch, 0);
1498: }
1499: }
1500: }
1501: }
1502:
1503: /**
1504: * Retrieve the text content of a DOM subtree, appending it into a
1505: * user-supplied FastStringBuffer object. Note that attributes are
1506: * not considered part of the content of an element.
1507: * <p>
1508: * There are open questions regarding whitespace stripping.
1509: * Currently we make no special effort in that regard, since the standard
1510: * DOM doesn't yet provide DTD-based information to distinguish
1511: * whitespace-in-element-context from genuine #PCDATA. Note that we
1512: * should probably also consider xml:space if/when we address this.
1513: * DOM Level 3 may solve the problem for us.
1514: * <p>
1515: * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1516: * routine _shouldn't_ perform any processing beyond what the DOM already
1517: * does, and that whitespace stripping and so on belong at the DTM level.
1518: * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1519: *
1520: * @param node Node whose subtree is to be walked, gathering the
1521: * contents of all Text or CDATASection nodes.
1522: */
1523: protected static void dispatchNodeData(Node node,
1524: org.xml.sax.ContentHandler ch, int depth)
1525: throws org.xml.sax.SAXException {
1526:
1527: switch (node.getNodeType()) {
1528: case Node.DOCUMENT_FRAGMENT_NODE:
1529: case Node.DOCUMENT_NODE:
1530: case Node.ELEMENT_NODE: {
1531: for (Node child = node.getFirstChild(); null != child; child = child
1532: .getNextSibling()) {
1533: dispatchNodeData(child, ch, depth + 1);
1534: }
1535: }
1536: break;
1537: case Node.PROCESSING_INSTRUCTION_NODE: // %REVIEW%
1538: case Node.COMMENT_NODE:
1539: if (0 != depth)
1540: break;
1541: // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1542: // to perform Text Coalition. That should only be done in DTM space.
1543: case Node.TEXT_NODE:
1544: case Node.CDATA_SECTION_NODE:
1545: case Node.ATTRIBUTE_NODE:
1546: String str = node.getNodeValue();
1547: if (ch instanceof CharacterNodeHandler) {
1548: ((CharacterNodeHandler) ch).characters(node);
1549: } else {
1550: ch.characters(str.toCharArray(), 0, str.length());
1551: }
1552: break;
1553: // /* case Node.PROCESSING_INSTRUCTION_NODE :
1554: // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1555: // break; */
1556: default:
1557: // ignore
1558: break;
1559: }
1560: }
1561:
1562: TreeWalker m_walker = new TreeWalker(null);
1563:
1564: /**
1565: * Directly create SAX parser events from a subtree.
1566: *
1567: * @param nodeHandle The node ID.
1568: * @param ch A non-null reference to a ContentHandler.
1569: *
1570: * @throws org.xml.sax.SAXException
1571: */
1572: public void dispatchToEvents(int nodeHandle,
1573: org.xml.sax.ContentHandler ch)
1574: throws org.xml.sax.SAXException {
1575: TreeWalker treeWalker = m_walker;
1576: ContentHandler prevCH = treeWalker.getContentHandler();
1577:
1578: if (null != prevCH) {
1579: treeWalker = new TreeWalker(null);
1580: }
1581: treeWalker.setContentHandler(ch);
1582:
1583: try {
1584: Node node = getNode(nodeHandle);
1585: treeWalker.traverse(node);
1586: } finally {
1587: treeWalker.setContentHandler(null);
1588: }
1589: }
1590:
1591: public interface CharacterNodeHandler {
1592: public void characters(Node node)
1593: throws org.xml.sax.SAXException;
1594: }
1595:
1596: /**
1597: * For the moment all the run time properties are ignored by this
1598: * class.
1599: *
1600: * @param property a <code>String</code> value
1601: * @param value an <code>Object</code> value
1602: */
1603: public void setProperty(String property, Object value) {
1604: }
1605:
1606: /**
1607: * No source information is available for DOM2DTM, so return
1608: * <code>null</code> here.
1609: *
1610: * @param node an <code>int</code> value
1611: * @return null
1612: */
1613: public SourceLocator getSourceLocatorFor(int node) {
1614: return null;
1615: }
1616:
1617: }
|