001: package com.meterware.httpunit;
002:
003: /********************************************************************************************************************
004: * $Id: NodeUtils.java,v 1.21 2004/08/08 17:45:30 russgold Exp $
005: *
006: * Copyright (c) 2000-2004, Russell Gold
007: *
008: * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
009: * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
010: * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
011: * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
012: *
013: * The above copyright notice and this permission notice shall be included in all copies or substantial portions
014: * of the Software.
015: *
016: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
017: * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
018: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
019: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020: * DEALINGS IN THE SOFTWARE.
021: *
022: *******************************************************************************************************************/
023: import org.w3c.dom.Element;
024: import org.w3c.dom.NamedNodeMap;
025: import org.w3c.dom.Node;
026: import org.w3c.dom.NodeList;
027:
028: import java.util.Stack;
029: import java.util.Iterator;
030: import java.util.ListIterator;
031:
032: import com.meterware.httpunit.parsing.HTMLParserFactory;
033:
034: /**
035: * Some common utilities for manipulating DOM nodes.
036: **/
037: class NodeUtils {
038:
039: public static int getAttributeValue(Node node,
040: String attributeName, int defaultValue) {
041: NamedNodeMap nnm = node.getAttributes();
042: Node attribute = nnm.getNamedItem(attributeName);
043: if (attribute == null) {
044: return defaultValue;
045: } else
046: try {
047: return Integer.parseInt(attribute.getNodeValue());
048: } catch (NumberFormatException e) {
049: return defaultValue;
050: }
051: }
052:
053: public static String getNodeAttribute(Node node,
054: String attributeName) {
055: return getNodeAttribute(node, attributeName, "");
056: }
057:
058: public static String getNodeAttribute(Node node,
059: String attributeName, String defaultValue) {
060: NamedNodeMap attributes = node.getAttributes();
061: if (attributes == null)
062: return defaultValue;
063:
064: Node attribute = attributes.getNamedItem(attributeName);
065: return (attribute == null) ? defaultValue : attribute
066: .getNodeValue();
067: }
068:
069: static boolean isNodeAttributePresent(Node node,
070: final String attributeName) {
071: return node.getAttributes().getNamedItem(attributeName) != null;
072: }
073:
074: interface NodeAction {
075: /**
076: * Does appropriate processing on specified element. Will return false if the subtree below the element
077: * should be skipped.
078: */
079: public boolean processElement(PreOrderTraversal traversal,
080: Element element);
081:
082: /**
083: * Processes a text node.
084: */
085: public void processTextNode(PreOrderTraversal traversal,
086: Node textNode);
087: }
088:
089: /**
090: * Converts the DOM trees rooted at the specified nodes to text, ignoring
091: * any HTML tags.
092: **/
093: public static String asText(NodeList rootNodes) {
094: final StringBuffer sb = new StringBuffer(
095: HttpUnitUtils.DEFAULT_TEXT_BUFFER_SIZE);
096: NodeAction action = new NodeAction() {
097: public boolean processElement(PreOrderTraversal traversal,
098: Element node) {
099: String nodeName = node.getNodeName().toLowerCase();
100: if (nodeName.equals("p") || nodeName.equals("br")
101: || nodeName.equalsIgnoreCase("tr")) {
102: sb.append("\n");
103: } else if (nodeName.equals("td")
104: || nodeName.equalsIgnoreCase("th")) {
105: sb.append(" | ");
106: } else if (nodeName.equals("img")
107: && HttpUnitOptions.getImagesTreatedAsAltText()) {
108: sb.append(getNodeAttribute(node, "alt"));
109: }
110: return true;
111: }
112:
113: public void processTextNode(PreOrderTraversal traversal,
114: Node textNode) {
115: sb.append(HTMLParserFactory.getHTMLParser()
116: .getCleanedText(textNode.getNodeValue()));
117: }
118: };
119: new PreOrderTraversal(rootNodes).perform(action);
120: return sb.toString();
121: }
122:
123: static class PreOrderTraversal {
124:
125: private Stack _pendingNodes = new Stack();
126: private Stack _traversalContext = new Stack();
127: private static final Object POP_CONTEXT = new Object();
128:
129: public PreOrderTraversal(NodeList rootNodes) {
130: pushNodeList(rootNodes);
131: }
132:
133: public PreOrderTraversal(Node rootNode) {
134: pushNodeList(rootNode.getLastChild());
135: }
136:
137: public void pushBaseContext(Object context) {
138: _traversalContext.push(context);
139: }
140:
141: public void pushContext(Object context) {
142: _traversalContext.push(context);
143: _pendingNodes.push(POP_CONTEXT);
144: }
145:
146: public Iterator getContexts() {
147: Stack stack = _traversalContext;
148: return getTopDownIterator(stack);
149: }
150:
151: public Object getRootContext() {
152: return _traversalContext.firstElement();
153: }
154:
155: private Iterator getTopDownIterator(final Stack stack) {
156: return new Iterator() {
157: private ListIterator _forwardIterator = stack
158: .listIterator(stack.size());
159:
160: public boolean hasNext() {
161: return _forwardIterator.hasPrevious();
162: }
163:
164: public Object next() {
165: return _forwardIterator.previous();
166: }
167:
168: public void remove() {
169: _forwardIterator.remove();
170: }
171: };
172: }
173:
174: /**
175: * Returns the most recently pushed context which implements the specified class.
176: * Will return null if no matching context is found.
177: */
178: public Object getClosestContext(Class matchingClass) {
179: for (int i = _traversalContext.size() - 1; i >= 0; i--) {
180: Object o = _traversalContext.elementAt(i);
181: if (matchingClass.isInstance(o))
182: return o;
183: }
184: return null;
185: }
186:
187: public void perform(NodeAction action) {
188: while (!_pendingNodes.empty()) {
189: final Object object = _pendingNodes.pop();
190: if (object == POP_CONTEXT) {
191: _traversalContext.pop();
192: } else {
193: Node node = (Node) object;
194: if (node.getNodeType() == Node.TEXT_NODE) {
195: action.processTextNode(this , node);
196: } else if (node.getNodeType() != Node.ELEMENT_NODE) {
197: continue;
198: } else
199: action.processElement(this , (Element) node);
200: pushNodeList(node.getLastChild());
201: }
202: }
203: }
204:
205: private void pushNodeList(NodeList nl) {
206: if (nl != null) {
207: for (int i = nl.getLength() - 1; i >= 0; i--) {
208: _pendingNodes.push(nl.item(i));
209: }
210: }
211: }
212:
213: private void pushNodeList(Node lastChild) {
214: for (Node node = lastChild; node != null; node = node
215: .getPreviousSibling()) {
216: _pendingNodes.push(node);
217: }
218: }
219: }
220:
221: }
|