001: /*
002: * Copyright 2001-2007 Geert Bevin <gbevin[remove] at uwyn dot com>
003: * Distributed under the terms of either:
004: * - the common development and distribution license (CDDL), v1.0; or
005: * - the GNU Lesser General Public License, v2.1 or later
006: * $Id: ParsedHtml.java 3846 2007-07-11 11:33:53Z gbevin $
007: */
008: package com.uwyn.rife.test;
009:
010: import java.io.IOException;
011: import java.io.Reader;
012: import java.io.StringReader;
013: import java.util.ArrayList;
014: import java.util.List;
015: import org.apache.xerces.parsers.DOMParser;
016: import org.cyberneko.html.HTMLConfiguration;
017: import org.w3c.dom.Document;
018: import org.w3c.dom.NamedNodeMap;
019: import org.w3c.dom.Node;
020: import org.w3c.dom.NodeList;
021: import org.xml.sax.InputSource;
022: import org.xml.sax.SAXException;
023:
024: /**
025: * Retrieves the text content of a {@link MockResponse} and parses it as HTML.
026: *
027: * @author Geert Bevin (gbevin[remove] at uwyn dot com)
028: * @version $Revision: 3846 $
029: * @since 1.1
030: */
031: public class ParsedHtml {
032: private static final String DEFAULT_ENCODING = "http://cyberneko.org/html/properties/default-encoding";
033: private static final String TAG_NAME_CASE = "http://cyberneko.org/html/properties/names/elems";
034: private static final String ATTRIBUTE_NAME_CASE = "http://cyberneko.org/html/properties/names/attrs";
035:
036: private MockResponse mResponse;
037: private Document mDocument;
038: private List<MockForm> mForms = new ArrayList<MockForm>();
039: private List<MockLink> mLinks = new ArrayList<MockLink>();
040:
041: private ParsedHtml(MockResponse response, Document document) {
042: mResponse = response;
043: mDocument = document;
044:
045: // get all the forms
046: NodeList form_nodes = document.getElementsByTagName("form");
047: for (int i = 0; i < form_nodes.getLength(); i++) {
048: Node form_node = form_nodes.item(i);
049: MockForm form = new MockForm(mResponse, form_node);
050: mForms.add(form);
051: }
052:
053: // get all the links
054: NodeList link_nodes = document.getElementsByTagName("a");
055: for (int i = 0; i < link_nodes.getLength(); i++) {
056: Node link_node = link_nodes.item(i);
057: MockLink link = new MockLink(mResponse, link_node);
058: mLinks.add(link);
059: }
060: }
061:
062: /**
063: * Parses the text content of a {@link MockResponse} object as HTML and
064: * returns the result as an instance of <code>ParsedHtml</code>.
065: *
066: * @param response the response whose text content will be parsed
067: * @return the resulting instance of <code>ParsedHtml</code>
068: * @since 1.1
069: */
070: public static ParsedHtml parse(MockResponse response)
071: throws IOException, SAXException {
072: return parse(response, response.getText());
073: }
074:
075: static ParsedHtml parse(MockResponse response, String text)
076: throws IOException, SAXException {
077: Reader reader = new StringReader(text);
078: InputSource inputsource = new InputSource(reader);
079:
080: HTMLConfiguration config = new HTMLConfiguration();
081: config.setProperty(DEFAULT_ENCODING, "UTF-8");
082: config.setProperty(TAG_NAME_CASE, "lower");
083: config.setProperty(ATTRIBUTE_NAME_CASE, "lower");
084: DOMParser parser = new DOMParser(config);
085:
086: parser.parse(inputsource);
087:
088: Document document = parser.getDocument();
089:
090: return new ParsedHtml(response, document);
091: }
092:
093: /**
094: * Retrieves the DOM XML document that corresponds to the parsed HTML.
095: *
096: * @return the DOM XML document
097: * @since 1.1
098: */
099: public Document getDocument() {
100: return mDocument;
101: }
102:
103: /**
104: * Retrieves the text of the <code>title</code> tag.
105: *
106: * @return the title
107: * @since 1.1
108: */
109: public String getTitle() {
110: NodeList list = mDocument.getElementsByTagName("title");
111: if (0 == list.getLength()) {
112: return null;
113: }
114:
115: return list.item(0).getTextContent();
116: }
117:
118: /**
119: * Retrieves the list of all the forms in the HTML document.
120: *
121: * @return a list with {@link MockForm} instances
122: * @see #getFormWithName
123: * @see #getFormWithId
124: * @since 1.1
125: */
126: public List<MockForm> getForms() {
127: return mForms;
128: }
129:
130: /**
131: * Retrieves the first form in the HTML document with a particular
132: * <code>name</code> attribute.
133: *
134: * @param name the content of the <code>name</code> attribute
135: * @return the first {@link MockForm} whose <code>name</code> attribute
136: * matches; or
137: * <p><code>null</code> if no such form could be found
138: * @see #getForms
139: * @see #getFormWithId
140: * @since 1.1
141: */
142: public MockForm getFormWithName(String name) {
143: if (null == name)
144: throw new IllegalArgumentException("name can't be null");
145: if (0 == name.length())
146: throw new IllegalArgumentException("name can't be empty");
147:
148: for (MockForm form : mForms) {
149: if (name.equals(form.getName())) {
150: return form;
151: }
152: }
153: return null;
154: }
155:
156: /**
157: * Retrieves the first form in the HTML document with a particular
158: * <code>id</code> attribute.
159: *
160: * @param id the content of the <code>id</code> attribute
161: * @return the first {@link MockForm} whose <code>id</code> attribute
162: * matches; or
163: * <p><code>null</code> if no such form could be found
164: * @see #getForms
165: * @see #getFormWithName
166: * @since 1.1
167: */
168: public MockForm getFormWithId(String id) {
169: if (null == id)
170: throw new IllegalArgumentException("id can't be null");
171: if (0 == id.length())
172: throw new IllegalArgumentException("id can't be empty");
173:
174: for (MockForm form : mForms) {
175: if (form.getId().equals(id)) {
176: return form;
177: }
178: }
179: return null;
180: }
181:
182: /**
183: * Retrieves the list of all the links in the HTML document.
184: *
185: * @return a list with {@link MockLink} instances
186: * @see #getLinkWithName
187: * @see #getLinkWithId
188: * @see #getLinkWithText
189: * @see #getLinkWithImageAlt
190: * @see #getLinkWithImageName
191: * @since 1.1
192: */
193: public List<MockLink> getLinks() {
194: return mLinks;
195: }
196:
197: /**
198: * Retrieves the first link in the HTML document with a particular
199: * <code>name</code> attribute.
200: *
201: * @param name the content of the <code>name</code> attribute
202: * @return the first {@link MockLink} whose <code>name</code> attribute
203: * matches; or
204: * <p><code>null</code> if no such link could be found
205: * @see #getLinks
206: * @see #getLinkWithId
207: * @see #getLinkWithText
208: * @see #getLinkWithImageAlt
209: * @see #getLinkWithImageName
210: * @since 1.1
211: */
212: public MockLink getLinkWithName(String name) {
213: if (null == name)
214: throw new IllegalArgumentException("name can't be null");
215: if (0 == name.length())
216: throw new IllegalArgumentException("name can't be empty");
217:
218: for (MockLink link : mLinks) {
219: if (link.getName().equals(name)) {
220: return link;
221: }
222: }
223: return null;
224: }
225:
226: /**
227: * Retrieves the first link in the HTML document with a particular
228: * <code>id</code> attribute.
229: *
230: * @param id the content of the <code>id</code> attribute
231: * @return the first {@link MockLink} whose <code>id</code> attribute
232: * matches; or
233: * <p><code>null</code> if no such link could be found
234: * @see #getLinks
235: * @see #getLinkWithName
236: * @see #getLinkWithText
237: * @see #getLinkWithImageAlt
238: * @see #getLinkWithImageName
239: * @since 1.1
240: */
241: public MockLink getLinkWithId(String id) {
242: if (null == id)
243: throw new IllegalArgumentException("id can't be null");
244: if (0 == id.length())
245: throw new IllegalArgumentException("id can't be empty");
246:
247: for (MockLink link : mLinks) {
248: if (id.equals(link.getId())) {
249: return link;
250: }
251: }
252: return null;
253: }
254:
255: /**
256: * Retrieves the first link in the HTML document that surrounds a particular
257: * text.
258: *
259: * @param text the surrounded text
260: * @return the first {@link MockLink} whose surrounded text matches; or
261: * <p><code>null</code> if no such link could be found
262: * @see #getLinks
263: * @see #getLinkWithName
264: * @see #getLinkWithId
265: * @see #getLinkWithText
266: * @see #getLinkWithImageName
267: * @since 1.1
268: */
269: public MockLink getLinkWithText(String text) {
270: if (null == text)
271: throw new IllegalArgumentException("text can't be null");
272:
273: for (MockLink link : mLinks) {
274: if (link.getText() != null && link.getText().equals(text)) {
275: return link;
276: }
277: }
278: return null;
279: }
280:
281: /**
282: * Retrieves the first link in the HTML document that surrounds an
283: * <code>img</code> tag with a certain <code>alt</code> attribute.
284: *
285: * @param alt the content of the <code>alt</code> attribute
286: * @return the first {@link MockLink} that has an <code>img</code> tag
287: * whose <code>alt</code> attribute matches; or
288: * <p><code>null</code> if no such link could be found
289: * @see #getLinks
290: * @see #getLinkWithName
291: * @see #getLinkWithId
292: * @see #getLinkWithText
293: * @see #getLinkWithImageName
294: * @since 1.1
295: */
296: public MockLink getLinkWithImageAlt(String alt) {
297: if (null == alt)
298: throw new IllegalArgumentException("alt can't be null");
299:
300: for (MockLink link : mLinks) {
301: Node node = link.getNode();
302: NodeList child_nodes = node.getChildNodes();
303: if (child_nodes != null && child_nodes.getLength() > 0) {
304: for (int i = 0; i < child_nodes.getLength(); i++) {
305: Node child_node = child_nodes.item(i);
306: if ("img".equals(child_node.getNodeName())) {
307: String alt_text = getNodeAttribute(child_node,
308: "alt", null);
309: if (alt_text != null && alt_text.equals(alt)) {
310: return link;
311: }
312: }
313: }
314: }
315: }
316: return null;
317: }
318:
319: /**
320: * Retrieves the first link in the HTML document that surrounds an
321: * <code>img</code> tag with a certain <code>name</code> attribute.
322: *
323: * @param name the content of the <code>name</code> attribute
324: * @return the first {@link MockLink} that has an <code>img</code> tag
325: * whose <code>name</code> attribute matches; or
326: * <p><code>null</code> if no such link could be found
327: * @see #getLinks
328: * @see #getLinkWithName
329: * @see #getLinkWithId
330: * @see #getLinkWithText
331: * @see #getLinkWithImageAlt
332: * @since 1.1
333: */
334: public MockLink getLinkWithImageName(String name) {
335: if (null == name)
336: throw new IllegalArgumentException("name can't be null");
337: if (0 == name.length())
338: throw new IllegalArgumentException("name can't be empty");
339:
340: for (MockLink link : mLinks) {
341: Node node = link.getNode();
342: NodeList child_nodes = node.getChildNodes();
343: if (child_nodes != null && child_nodes.getLength() > 0) {
344: for (int i = 0; i < child_nodes.getLength(); i++) {
345: Node child_node = child_nodes.item(i);
346: if ("img".equals(child_node.getNodeName())) {
347: String alt_text = getNodeAttribute(child_node,
348: "name", null);
349: if (alt_text != null && alt_text.equals(name)) {
350: return link;
351: }
352: }
353: }
354: }
355: }
356: return null;
357: }
358:
359: /**
360: * Retrieves the value of the attribute of an XML DOM node.
361: *
362: * @param node the node where the attribute should be obtained from
363: * @param attributeName the name of the attribute
364: * @return the value of the attribute; or
365: * <p><code>null</code> if no attribute could be found
366: * @since 1.2
367: */
368: public static String getNodeAttribute(Node node,
369: String attributeName) {
370: return getNodeAttribute(node, attributeName, null);
371: }
372:
373: static String getNodeAttribute(Node node, String attributeName,
374: String defaultValue) {
375: NamedNodeMap attributes = node.getAttributes();
376: if (attributes == null)
377: return defaultValue;
378:
379: Node attribute = attributes.getNamedItem(attributeName);
380: return (attribute == null) ? defaultValue : attribute
381: .getNodeValue();
382: }
383: }
|