0001: package com.meterware.httpunit;
0002:
0003: /********************************************************************************************************************
0004: * $Id: ParsedHTML.java,v 1.63 2006/03/09 01:52:28 russgold Exp $
0005: *
0006: * Copyright (c) 2000-2004, Russell Gold
0007: *
0008: * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
0009: * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
0010: * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
0011: * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
0012: *
0013: * The above copyright notice and this permission notice shall be included in all copies or substantial portions
0014: * of the Software.
0015: *
0016: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
0017: * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0018: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
0019: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
0020: * DEALINGS IN THE SOFTWARE.
0021: *
0022: *******************************************************************************************************************/
0023: import org.w3c.dom.Element;
0024: import org.w3c.dom.Node;
0025: import org.w3c.dom.NodeList;
0026: import org.w3c.dom.Document;
0027:
0028: import java.net.URL;
0029: import java.util.*;
0030: import java.io.IOException;
0031:
0032: import com.meterware.httpunit.scripting.ScriptableDelegate;
0033:
0034: /**
0035: * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a>
0036: * @author <a href="mailto:bx@bigfoot.com">Benoit Xhenseval</a>
0037: **/
0038: class ParsedHTML {
0039:
0040: final static private HTMLElement[] NO_ELEMENTS = new HTMLElement[0];
0041:
0042: final static private String[] TEXT_ELEMENTS = { "p", "h1", "h2",
0043: "h3", "h4", "h5", "h6" };
0044:
0045: private Node _rootNode;
0046:
0047: private URL _baseURL;
0048:
0049: private FrameSelector _frame;
0050:
0051: private String _baseTarget;
0052:
0053: private String _characterSet;
0054:
0055: private WebResponse _response;
0056:
0057: private boolean _updateElements = true;
0058:
0059: private boolean _enableNoScriptNodes;
0060:
0061: /** map of element IDs to elements. **/
0062: private HashMap _elementsByID = new HashMap();
0063:
0064: /** map of element names to lists of elements. **/
0065: private HashMap _elementsByName = new HashMap();
0066:
0067: /** map of DOM elements to HTML elements **/
0068: private HashMap _elements = new HashMap();
0069:
0070: private ArrayList _formsList = new ArrayList();
0071: private WebForm[] _forms;
0072: private WebForm _activeForm;
0073:
0074: private ArrayList _imagesList = new ArrayList();
0075: private WebImage[] _images;
0076:
0077: private ArrayList _linkList = new ArrayList();
0078: private WebLink[] _links;
0079:
0080: private ArrayList _blocksList = new ArrayList();
0081: private TextBlock[] _blocks;
0082:
0083: private ArrayList _appletList = new ArrayList();
0084: private WebApplet[] _applets;
0085:
0086: private ArrayList _tableList = new ArrayList();
0087: private WebTable[] _tables;
0088:
0089: private ArrayList _frameList = new ArrayList();
0090: private WebFrame[] _frames;
0091:
0092: ParsedHTML(WebResponse response, FrameSelector frame, URL baseURL,
0093: String baseTarget, Node rootNode, String characterSet) {
0094: _response = response;
0095: _frame = frame;
0096: _baseURL = baseURL;
0097: _baseTarget = baseTarget;
0098: _rootNode = rootNode;
0099: _characterSet = characterSet;
0100: }
0101:
0102: /**
0103: * Returns the forms found in the page in the order in which they appear.
0104: **/
0105: public WebForm[] getForms() {
0106: if (_forms == null) {
0107: loadElements();
0108: _forms = (WebForm[]) _formsList
0109: .toArray(new WebForm[_formsList.size()]);
0110: }
0111: return _forms;
0112: }
0113:
0114: /**
0115: * Returns the links found in the page in the order in which they appear.
0116: **/
0117: public WebLink[] getLinks() {
0118: if (_links == null) {
0119: loadElements();
0120: _links = (WebLink[]) _linkList
0121: .toArray(new WebLink[_linkList.size()]);
0122: }
0123: return _links;
0124: }
0125:
0126: /**
0127: * Returns a proxy for each applet found embedded in this page.
0128: */
0129: public WebApplet[] getApplets() {
0130: if (_applets == null) {
0131: loadElements();
0132: _applets = (WebApplet[]) _appletList
0133: .toArray(new WebApplet[_appletList.size()]);
0134: }
0135: return _applets;
0136: }
0137:
0138: /**
0139: * Returns the images found in the page in the order in which they appear.
0140: **/
0141: public WebImage[] getImages() {
0142: if (_images == null) {
0143: loadElements();
0144: _images = (WebImage[]) _imagesList
0145: .toArray(new WebImage[_imagesList.size()]);
0146: }
0147: return _images;
0148: }
0149:
0150: /**
0151: * Returns the top-level block elements found in the page in the order in which they appear.
0152: */
0153: public TextBlock[] getTextBlocks() {
0154: if (_blocks == null) {
0155: loadElements();
0156: _blocks = (TextBlock[]) _blocksList
0157: .toArray(new TextBlock[_blocksList.size()]);
0158: }
0159: return _blocks;
0160: }
0161:
0162: /**
0163: * Returns the first text block found in the page which matches the specified predicate and value.
0164: */
0165: public TextBlock getFirstMatchingTextBlock(
0166: HTMLElementPredicate predicate, Object criteria) {
0167: TextBlock[] blocks = getTextBlocks();
0168: for (int i = 0; i < blocks.length; i++) {
0169: if (predicate.matchesCriteria(blocks[i], criteria))
0170: return blocks[i];
0171: }
0172: return null;
0173: }
0174:
0175: public TextBlock getNextTextBlock(TextBlock block) {
0176: int index = _blocksList.indexOf(block);
0177: if (index < 0 || index == _blocksList.size() - 1)
0178: return null;
0179: return (TextBlock) _blocksList.get(index + 1);
0180: }
0181:
0182: /**
0183: * Returns the top-level tables found in the page in the order in which they appear.
0184: **/
0185: public WebTable[] getTables() {
0186: if (_tables == null) {
0187: loadElements();
0188: _tables = (WebTable[]) _tableList
0189: .toArray(new WebTable[_tableList.size()]);
0190: }
0191: return _tables;
0192: }
0193:
0194: /**
0195: * Returns the HTMLElement with the specified ID.
0196: */
0197: public HTMLElement getElementWithID(String id) {
0198: return (HTMLElement) getElementWithID(id, HTMLElement.class);
0199: }
0200:
0201: /**
0202: * Returns the HTML elements with the specified name.
0203: */
0204: public HTMLElement[] getElementsWithName(String name) {
0205: loadElements();
0206: ArrayList elements = (ArrayList) _elementsByName.get(name);
0207: return elements == null ? NO_ELEMENTS
0208: : (HTMLElement[]) elements
0209: .toArray(new HTMLElement[elements.size()]);
0210: }
0211:
0212: /**
0213: * Returns the HTML elements with an attribute with the specified name and value.
0214: */
0215: public HTMLElement[] getElementsWithAttribute(String name,
0216: String value) {
0217: loadElements();
0218: ArrayList elements = new ArrayList();
0219: for (Iterator i = _elements.values().iterator(); i.hasNext();) {
0220: HTMLElement element = (HTMLElement) i.next();
0221: if (value.equals(element.getAttribute(name)))
0222: elements.add(element);
0223: }
0224: return (HTMLElement[]) elements
0225: .toArray(new HTMLElement[elements.size()]);
0226: }
0227:
0228: /**
0229: * Returns a list of HTML element names contained in this HTML section.
0230: */
0231: public String[] getElementNames() {
0232: loadElements();
0233: return (String[]) _elementsByName.keySet().toArray(
0234: new String[_elementsByName.size()]);
0235: }
0236:
0237: HTMLElement[] getElementsByTagName(Node dom, String name) {
0238: loadElements();
0239: if (dom instanceof Element) {
0240: return getElementsFromList(((Element) dom)
0241: .getElementsByTagName(name));
0242: } else {
0243: return getElementsFromList(((Document) dom)
0244: .getElementsByTagName(name));
0245: }
0246: }
0247:
0248: private HTMLElement[] getElementsFromList(NodeList nl) {
0249: HTMLElement[] elements = new HTMLElement[nl.getLength()];
0250: for (int i = 0; i < elements.length; i++) {
0251: Node node = nl.item(i);
0252: elements[i] = (HTMLElement) _elements.get(node);
0253: if (elements[i] == null) {
0254: elements[i] = toDefaultElement((Element) node);
0255: _elements.put(node, elements[i]);
0256: }
0257: }
0258: return elements;
0259: }
0260:
0261: /**
0262: * Returns the form found in the page with the specified ID.
0263: **/
0264: public WebForm getFormWithID(String id) {
0265: return (WebForm) getElementWithID(id, WebForm.class);
0266: }
0267:
0268: /**
0269: * Returns the link found in the page with the specified ID.
0270: **/
0271: public WebLink getLinkWithID(String id) {
0272: return (WebLink) getElementWithID(id, WebLink.class);
0273:
0274: }
0275:
0276: private Object getElementWithID(String id, final Class klass) {
0277: loadElements();
0278: return whenCast(_elementsByID.get(id), klass);
0279: }
0280:
0281: private Object whenCast(Object o, Class klass) {
0282: return klass.isInstance(o) ? o : null;
0283: }
0284:
0285: /**
0286: * Returns the first link found in the page matching the specified criteria.
0287: **/
0288: public WebForm getFirstMatchingForm(HTMLElementPredicate predicate,
0289: Object criteria) {
0290: WebForm[] forms = getForms();
0291: for (int i = 0; i < forms.length; i++) {
0292: if (predicate.matchesCriteria(forms[i], criteria))
0293: return forms[i];
0294: }
0295: return null;
0296: }
0297:
0298: /**
0299: * Returns all links found in the page matching the specified criteria.
0300: **/
0301: public WebForm[] getMatchingForms(HTMLElementPredicate predicate,
0302: Object criteria) {
0303: ArrayList matches = new ArrayList();
0304: WebForm[] forms = getForms();
0305: for (int i = 0; i < forms.length; i++) {
0306: if (predicate.matchesCriteria(forms[i], criteria))
0307: matches.add(forms[i]);
0308: }
0309: return (WebForm[]) matches.toArray(new WebForm[matches.size()]);
0310: }
0311:
0312: /**
0313: * Returns the form found in the page with the specified name.
0314: **/
0315: public WebForm getFormWithName(String name) {
0316: return getFirstMatchingForm(WebForm.MATCH_NAME, name);
0317: }
0318:
0319: private void interpretScriptElement(Element element) {
0320: String script = getScript(element);
0321: if (script != null) {
0322: try {
0323: _updateElements = false;
0324: String language = NodeUtils.getNodeAttribute(element,
0325: "language", null);
0326: if (!getResponse().getScriptableObject()
0327: .supportsScript(language))
0328: _enableNoScriptNodes = true;
0329: getResponse().getScriptableObject().runScript(language,
0330: script);
0331: } finally {
0332: setRootNode(_rootNode);
0333: }
0334: }
0335: }
0336:
0337: private String getScript(Node scriptNode) {
0338: String scriptLocation = NodeUtils.getNodeAttribute(scriptNode,
0339: "src", null);
0340: if (scriptLocation == null) {
0341: return NodeUtils.asText(scriptNode.getChildNodes());
0342: } else {
0343: try {
0344: return getIncludedScript(scriptLocation);
0345: } catch (IOException e) {
0346: throw new RuntimeException(
0347: "Error loading included script: " + e);
0348: }
0349: }
0350: }
0351:
0352: /**
0353: * Returns the contents of an included script, given its src attribute.
0354: * @param srcAttribute
0355: * @return the contents of the script.
0356: * @throws java.io.IOException if there is a problem retrieving the script
0357: */
0358: String getIncludedScript(String srcAttribute) throws IOException {
0359: WebRequest req = new GetMethodWebRequest(getBaseURL(),
0360: srcAttribute);
0361: WebWindow window = getResponse().getWindow();
0362: if (window == null)
0363: throw new IllegalStateException(
0364: "Unable to retrieve script included by this response, since it was loaded by getResource(). Use getResponse() instead.");
0365: return window.getResource(req).getText();
0366: }
0367:
0368: /**
0369: * If noscript node content is enabled, returns null - otherwise returns a concealing element.
0370: */
0371: private HTMLElement toNoscriptElement(Element element) {
0372: return _enableNoScriptNodes ? null : new NoScriptElement(
0373: element);
0374: }
0375:
0376: static class HtmlElementRecorder {
0377:
0378: protected void recordHtmlElement(
0379: NodeUtils.PreOrderTraversal pot, Node node,
0380: HTMLElement htmlElement) {
0381: if (htmlElement != null) {
0382: addToMaps(pot, node, htmlElement);
0383: addToLists(pot, htmlElement);
0384: }
0385: }
0386:
0387: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0388: HTMLElement htmlElement) {
0389: for (Iterator i = pot.getContexts(); i.hasNext();) {
0390: Object o = i.next();
0391: if (o instanceof ParsedHTML)
0392: ((ParsedHTML) o).addToList(htmlElement);
0393: }
0394: }
0395:
0396: protected void addToMaps(NodeUtils.PreOrderTraversal pot,
0397: Node node, HTMLElement htmlElement) {
0398: for (Iterator i = pot.getContexts(); i.hasNext();) {
0399: Object o = i.next();
0400: if (o instanceof ParsedHTML)
0401: ((ParsedHTML) o).addToMaps(node, htmlElement);
0402: }
0403: }
0404:
0405: }
0406:
0407: abstract static class HTMLElementFactory extends
0408: HtmlElementRecorder {
0409: abstract HTMLElement toHTMLElement(
0410: NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML,
0411: Element element);
0412:
0413: void recordElement(NodeUtils.PreOrderTraversal pot,
0414: Element element, ParsedHTML parsedHTML) {
0415: HTMLElement htmlElement = toHTMLElement(pot, parsedHTML,
0416: element);
0417: recordHtmlElement(pot, element, htmlElement);
0418: }
0419:
0420: protected boolean isRecognized(ClientProperties properties) {
0421: return true;
0422: }
0423:
0424: protected boolean addToContext() {
0425: return false;
0426: }
0427:
0428: final protected ParsedHTML getParsedHTML(
0429: NodeUtils.PreOrderTraversal pot) {
0430: return (ParsedHTML) getClosestContext(pot, ParsedHTML.class);
0431: }
0432:
0433: final protected Object getClosestContext(
0434: NodeUtils.PreOrderTraversal pot, Class aClass) {
0435: return pot.getClosestContext(aClass);
0436: }
0437:
0438: protected ParsedHTML getRootContext(
0439: NodeUtils.PreOrderTraversal pot) {
0440: return (ParsedHTML) pot.getRootContext();
0441: }
0442: }
0443:
0444: static class DefaultElementFactory extends HTMLElementFactory {
0445:
0446: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0447: ParsedHTML parsedHTML, Element element) {
0448: if (element.getAttribute("id").equals(""))
0449: return null;
0450: return parsedHTML.toDefaultElement(element);
0451: }
0452:
0453: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0454: HTMLElement htmlElement) {
0455: }
0456: }
0457:
0458: private HTMLElement toDefaultElement(Element element) {
0459: return new HTMLElementBase(element) {
0460: protected ScriptableDelegate newScriptable() {
0461: return new HTMLElementScriptable(this );
0462: }
0463:
0464: protected ScriptableDelegate getParentDelegate() {
0465: return getResponse().getScriptableObject()
0466: .getDocument();
0467: }
0468: };
0469: }
0470:
0471: static class WebFormFactory extends HTMLElementFactory {
0472: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0473: ParsedHTML parsedHTML, Element element) {
0474: return parsedHTML.toWebForm(element);
0475: }
0476:
0477: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0478: HTMLElement htmlElement) {
0479: super .addToLists(pot, htmlElement);
0480: getRootContext(pot)._activeForm = (WebForm) htmlElement;
0481: }
0482: }
0483:
0484: static class WebLinkFactory extends HTMLElementFactory {
0485: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0486: ParsedHTML parsedHTML, Element element) {
0487: return parsedHTML.toLinkAnchor(element);
0488: }
0489: }
0490:
0491: static class TextBlockFactory extends HTMLElementFactory {
0492: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0493: ParsedHTML parsedHTML, Element element) {
0494: return parsedHTML.toTextBlock(element);
0495: }
0496:
0497: protected boolean addToContext() {
0498: return true;
0499: }
0500:
0501: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0502: HTMLElement htmlElement) {
0503: for (Iterator i = pot.getContexts(); i.hasNext();) {
0504: Object o = i.next();
0505: if (!(o instanceof ParsedHTML))
0506: continue;
0507: ((ParsedHTML) o).addToList(htmlElement);
0508: break;
0509: }
0510: }
0511:
0512: }
0513:
0514: static class ScriptFactory extends HTMLElementFactory {
0515:
0516: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0517: ParsedHTML parsedHTML, Element element) {
0518: return null;
0519: }
0520:
0521: void recordElement(NodeUtils.PreOrderTraversal pot,
0522: Element element, ParsedHTML parsedHTML) {
0523: parsedHTML.interpretScriptElement(element);
0524: }
0525: }
0526:
0527: static class NoScriptFactory extends HTMLElementFactory {
0528:
0529: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0530: ParsedHTML parsedHTML, Element element) {
0531: return parsedHTML.toNoscriptElement(element);
0532: }
0533:
0534: protected boolean addToContext() {
0535: return true;
0536: }
0537: }
0538:
0539: static class WebFrameFactory extends HTMLElementFactory {
0540: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0541: ParsedHTML parsedHTML, Element element) {
0542: return parsedHTML.toWebFrame(element);
0543: }
0544: }
0545:
0546: static class WebIFrameFactory extends HTMLElementFactory {
0547: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0548: ParsedHTML parsedHTML, Element element) {
0549: return parsedHTML.toWebIFrame(element);
0550: }
0551:
0552: protected boolean isRecognized(ClientProperties properties) {
0553: return properties.isIframeSupported();
0554: }
0555:
0556: protected boolean addToContext() {
0557: return true;
0558: }
0559: }
0560:
0561: static class WebImageFactory extends HTMLElementFactory {
0562: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0563: ParsedHTML parsedHTML, Element element) {
0564: return parsedHTML.toWebImage(element);
0565: }
0566: }
0567:
0568: static class WebAppletFactory extends HTMLElementFactory {
0569: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0570: ParsedHTML parsedHTML, Element element) {
0571: return parsedHTML.toWebApplet(element);
0572: }
0573:
0574: protected boolean addToContext() {
0575: return true;
0576: }
0577: }
0578:
0579: static class WebTableFactory extends HTMLElementFactory {
0580: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0581: ParsedHTML parsedHTML, Element element) {
0582: return parsedHTML.toWebTable(element);
0583: }
0584:
0585: protected boolean addToContext() {
0586: return true;
0587: }
0588:
0589: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0590: HTMLElement htmlElement) {
0591: for (Iterator i = pot.getContexts(); i.hasNext();) {
0592: Object o = i.next();
0593: if (o instanceof ParsedHTML)
0594: ((ParsedHTML) o).addToList(htmlElement);
0595: if (o instanceof TableCell)
0596: break;
0597: }
0598: }
0599: }
0600:
0601: static class TableRowFactory extends HTMLElementFactory {
0602: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0603: ParsedHTML parsedHTML, Element element) {
0604: WebTable wt = getWebTable(pot);
0605: if (wt == null)
0606: return null;
0607: return wt.newTableRow(element);
0608: }
0609:
0610: private WebTable getWebTable(NodeUtils.PreOrderTraversal pot) {
0611: return (WebTable) getClosestContext(pot, WebTable.class);
0612: }
0613:
0614: protected boolean addToContext() {
0615: return true;
0616: }
0617:
0618: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0619: HTMLElement htmlElement) {
0620: getWebTable(pot).addRow((TableRow) htmlElement);
0621: }
0622: }
0623:
0624: static class TableCellFactory extends HTMLElementFactory {
0625: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0626: ParsedHTML parsedHTML, Element element) {
0627: TableRow tr = getTableRow(pot);
0628: if (tr == null)
0629: return null;
0630: return tr.newTableCell(element);
0631: }
0632:
0633: private TableRow getTableRow(NodeUtils.PreOrderTraversal pot) {
0634: return (TableRow) getClosestContext(pot, TableRow.class);
0635: }
0636:
0637: protected boolean addToContext() {
0638: return true;
0639: }
0640:
0641: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0642: HTMLElement htmlElement) {
0643: getTableRow(pot).addTableCell((TableCell) htmlElement);
0644: }
0645: }
0646:
0647: static class FormControlFactory extends HTMLElementFactory {
0648:
0649: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0650: ParsedHTML parsedHTML, Element element) {
0651: final WebForm form = getForm(pot);
0652: return form == null ? newControlWithoutForm(parsedHTML,
0653: element) : form.newFormControl(element);
0654: }
0655:
0656: private HTMLElement newControlWithoutForm(
0657: ParsedHTML parsedHTML, Element element) {
0658: if ((element.getNodeName().equalsIgnoreCase("button") || element
0659: .getNodeName().equalsIgnoreCase("input"))
0660: && isValidNonFormButtonType(NodeUtils
0661: .getNodeAttribute(element, "type"))) {
0662: return parsedHTML.toButtonWithoutForm(element);
0663: } else {
0664: return null;
0665: }
0666: }
0667:
0668: private boolean isValidNonFormButtonType(String buttonType) {
0669: return buttonType.equals("")
0670: || buttonType.equalsIgnoreCase("button");
0671: }
0672:
0673: private WebForm getForm(NodeUtils.PreOrderTraversal pot) {
0674: return getRootContext(pot)._activeForm;
0675: }
0676:
0677: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0678: HTMLElement htmlElement) {
0679: WebForm form = getForm(pot);
0680: if (form != null)
0681: form.addFormControl((FormControl) htmlElement);
0682: }
0683: }
0684:
0685: static class WebListFactory extends HTMLElementFactory {
0686: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0687: ParsedHTML parsedHTML, Element element) {
0688: return parsedHTML.toOrderedList(element);
0689: }
0690:
0691: protected boolean addToContext() {
0692: return true;
0693: }
0694:
0695: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0696: HTMLElement htmlElement) {
0697: TextBlock textBlock = getTextBlock(pot);
0698: if (textBlock != null)
0699: textBlock.addList((WebList) htmlElement);
0700: }
0701:
0702: private TextBlock getTextBlock(NodeUtils.PreOrderTraversal pot) {
0703: return (TextBlock) getClosestContext(pot, TextBlock.class);
0704: }
0705: }
0706:
0707: static class ListItemFactory extends HTMLElementFactory {
0708: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0709: ParsedHTML parsedHTML, Element element) {
0710: WebList webList = getWebList(pot);
0711: if (webList == null)
0712: return null;
0713: return webList.addNewItem(element);
0714: }
0715:
0716: private WebList getWebList(NodeUtils.PreOrderTraversal pot) {
0717: return (WebList) getClosestContext(pot, WebList.class);
0718: }
0719:
0720: protected boolean addToContext() {
0721: return true;
0722: }
0723:
0724: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0725: HTMLElement htmlElement) {
0726: }
0727: }
0728:
0729: private static HashMap _htmlFactoryClasses = new HashMap();
0730: private static HTMLElementFactory _defaultFactory = new DefaultElementFactory();
0731:
0732: static {
0733: _htmlFactoryClasses.put("a", new WebLinkFactory());
0734: _htmlFactoryClasses.put("area", new WebLinkFactory());
0735: _htmlFactoryClasses.put("form", new WebFormFactory());
0736: _htmlFactoryClasses.put("img", new WebImageFactory());
0737: _htmlFactoryClasses.put("applet", new WebAppletFactory());
0738: _htmlFactoryClasses.put("table", new WebTableFactory());
0739: _htmlFactoryClasses.put("tr", new TableRowFactory());
0740: _htmlFactoryClasses.put("td", new TableCellFactory());
0741: _htmlFactoryClasses.put("th", new TableCellFactory());
0742: _htmlFactoryClasses.put("frame", new WebFrameFactory());
0743: _htmlFactoryClasses.put("iframe", new WebIFrameFactory());
0744: _htmlFactoryClasses.put("script", new ScriptFactory());
0745: _htmlFactoryClasses.put("noscript", new NoScriptFactory());
0746: _htmlFactoryClasses.put("ol", new WebListFactory());
0747: _htmlFactoryClasses.put("ul", new WebListFactory());
0748: _htmlFactoryClasses.put("li", new ListItemFactory());
0749:
0750: for (int i = 0; i < TEXT_ELEMENTS.length; i++) {
0751: _htmlFactoryClasses.put(TEXT_ELEMENTS[i],
0752: new TextBlockFactory());
0753: }
0754:
0755: for (Iterator i = Arrays.asList(
0756: FormControl.getControlElementTags()).iterator(); i
0757: .hasNext();) {
0758: _htmlFactoryClasses.put(i.next(), new FormControlFactory());
0759: }
0760: }
0761:
0762: private static HTMLElementFactory getHTMLElementFactory(
0763: String tagName) {
0764: final HTMLElementFactory factory = (HTMLElementFactory) _htmlFactoryClasses
0765: .get(tagName);
0766: return factory != null ? factory : _defaultFactory;
0767: }
0768:
0769: private void loadElements() {
0770: if (!_updateElements)
0771: return;
0772:
0773: NodeUtils.NodeAction action = new NodeUtils.NodeAction() {
0774: public boolean processElement(
0775: NodeUtils.PreOrderTraversal pot, Element element) {
0776: HTMLElementFactory factory = getHTMLElementFactory(element
0777: .getNodeName().toLowerCase());
0778: if (factory == null
0779: || !factory.isRecognized(getClientProperties()))
0780: return true;
0781: if (pot.getClosestContext(ContentConcealer.class) != null)
0782: return true;
0783:
0784: if (!_elements.containsKey(element))
0785: factory
0786: .recordElement(pot, element,
0787: ParsedHTML.this );
0788: if (factory.addToContext())
0789: pot.pushContext(_elements.get(element));
0790:
0791: return true;
0792: }
0793:
0794: public void processTextNode(
0795: NodeUtils.PreOrderTraversal pot, Node textNode) {
0796: if (textNode.getNodeValue().trim().length() == 0)
0797: return;
0798:
0799: Node parent = textNode.getParentNode();
0800: if (!parent.getNodeName().equalsIgnoreCase("body"))
0801: return;
0802: if (pot.getClosestContext(ContentConcealer.class) != null)
0803: return;
0804: new HtmlElementRecorder().recordHtmlElement(pot,
0805: textNode, newTextBlock(textNode));
0806: }
0807: };
0808: NodeUtils.PreOrderTraversal nt = new NodeUtils.PreOrderTraversal(
0809: getRootNode());
0810: nt.pushBaseContext(this );
0811: nt.perform(action);
0812:
0813: _updateElements = false;
0814: }
0815:
0816: private ClientProperties getClientProperties() {
0817: WebWindow window = _response.getWindow();
0818: return window == null ? ClientProperties.getDefaultProperties()
0819: : window.getClient().getClientProperties();
0820: }
0821:
0822: private Button toButtonWithoutForm(Element element) {
0823: return new Button(_response, element);
0824: }
0825:
0826: private WebForm toWebForm(Element element) {
0827: return new WebForm(_response, _baseURL, element, _frame,
0828: _baseTarget, _characterSet);
0829: }
0830:
0831: private WebFrame toWebFrame(Element element) {
0832: return new WebFrame(_response, _baseURL, element, _frame);
0833: }
0834:
0835: private WebFrame toWebIFrame(Element element) {
0836: return new WebIFrame(_baseURL, element, _frame);
0837: }
0838:
0839: private WebLink toLinkAnchor(Element child) {
0840: return (!isWebLink(child)) ? null : new WebLink(_response,
0841: _baseURL, child, _frame, _baseTarget, _characterSet);
0842: }
0843:
0844: private boolean isWebLink(Node node) {
0845: return (node.getAttributes().getNamedItem("href") != null);
0846: }
0847:
0848: private WebImage toWebImage(Element child) {
0849: return new WebImage(_response, this , _baseURL, child, _frame,
0850: _baseTarget, _characterSet);
0851: }
0852:
0853: private WebApplet toWebApplet(Element element) {
0854: return new WebApplet(_response, element, _baseTarget);
0855: }
0856:
0857: private WebTable toWebTable(Element element) {
0858: return new WebTable(_response, _frame, element, _baseURL,
0859: _baseTarget, _characterSet);
0860: }
0861:
0862: private TextBlock toTextBlock(Element element) {
0863: return new TextBlock(_response, _frame, _baseURL, _baseTarget,
0864: element, _characterSet);
0865: }
0866:
0867: private TextBlock newTextBlock(Node textNode) {
0868: return new TextBlock(_response, _frame, _baseURL, _baseTarget,
0869: textNode, _characterSet);
0870: }
0871:
0872: private WebList toOrderedList(Element element) {
0873: return new WebList(_response, _frame, _baseURL, _baseTarget,
0874: element, _characterSet);
0875: }
0876:
0877: private void addToMaps(Node node, HTMLElement htmlElement) {
0878: _elements.put(node, htmlElement);
0879: if (htmlElement.getID() != null)
0880: _elementsByID.put(htmlElement.getID(), htmlElement);
0881: if (htmlElement.getName() != null)
0882: addNamedElement(htmlElement.getName(), htmlElement);
0883: }
0884:
0885: private void addNamedElement(String name, HTMLElement htmlElement) {
0886: List list = (List) _elementsByName.get(name);
0887: if (list == null)
0888: _elementsByName.put(name, list = new ArrayList());
0889: list.add(htmlElement);
0890: }
0891:
0892: private void addToList(HTMLElement htmlElement) {
0893: ArrayList list = getListForElement(htmlElement);
0894: if (list != null)
0895: list.add(htmlElement);
0896: }
0897:
0898: private ArrayList getListForElement(HTMLElement element) {
0899: if (element instanceof WebLink)
0900: return _linkList;
0901: if (element instanceof WebForm)
0902: return _formsList;
0903: if (element instanceof WebImage)
0904: return _imagesList;
0905: if (element instanceof WebApplet)
0906: return _appletList;
0907: if (element instanceof WebTable)
0908: return _tableList;
0909: if (element instanceof WebFrame)
0910: return _frameList;
0911: if (element instanceof BlockElement)
0912: return _blocksList;
0913: return null;
0914: }
0915:
0916: /**
0917: * Returns the first link which contains the specified text.
0918: **/
0919: public WebLink getLinkWith(String text) {
0920: return getFirstMatchingLink(WebLink.MATCH_CONTAINED_TEXT, text);
0921: }
0922:
0923: /**
0924: * Returns the link which contains the first image with the specified text as its 'alt' attribute.
0925: **/
0926: public WebLink getLinkWithImageText(String text) {
0927: WebImage image = getImageWithAltText(text);
0928: return image == null ? null : image.getLink();
0929: }
0930:
0931: /**
0932: * Returns the link found in the page with the specified name.
0933: **/
0934: public WebLink getLinkWithName(String name) {
0935: return getFirstMatchingLink(WebLink.MATCH_NAME, name);
0936: }
0937:
0938: /**
0939: * Returns the first link found in the page matching the specified criteria.
0940: **/
0941: public WebLink getFirstMatchingLink(HTMLElementPredicate predicate,
0942: Object criteria) {
0943: WebLink[] links = getLinks();
0944: for (int i = 0; i < links.length; i++) {
0945: if (predicate.matchesCriteria(links[i], criteria))
0946: return links[i];
0947: }
0948: return null;
0949: }
0950:
0951: /**
0952: * Returns all links found in the page matching the specified criteria.
0953: **/
0954: public WebLink[] getMatchingLinks(HTMLElementPredicate predicate,
0955: Object criteria) {
0956: ArrayList matches = new ArrayList();
0957: WebLink[] links = getLinks();
0958: for (int i = 0; i < links.length; i++) {
0959: if (predicate.matchesCriteria(links[i], criteria))
0960: matches.add(links[i]);
0961: }
0962: return (WebLink[]) matches.toArray(new WebLink[matches.size()]);
0963: }
0964:
0965: /**
0966: * Returns the image found in the page with the specified name.
0967: **/
0968: public WebImage getImageWithName(String name) {
0969: WebImage[] images = getImages();
0970: for (int i = 0; i < images.length; i++) {
0971: if (HttpUnitUtils.matches(name, images[i].getName()))
0972: return images[i];
0973: }
0974: return null;
0975: }
0976:
0977: /**
0978: * Returns the first image found in the page with the specified src attribute.
0979: **/
0980: public WebImage getImageWithSource(String source) {
0981: WebImage[] images = getImages();
0982: for (int i = 0; i < images.length; i++) {
0983: if (HttpUnitUtils.matches(source, images[i].getSource()))
0984: return images[i];
0985: }
0986: return null;
0987: }
0988:
0989: /**
0990: * Returns the first image found in the page with the specified alt attribute.
0991: **/
0992: public WebImage getImageWithAltText(String altText) {
0993: WebImage[] images = getImages();
0994: for (int i = 0; i < images.length; i++) {
0995: if (HttpUnitUtils.matches(altText, images[i].getAltText()))
0996: return images[i];
0997: }
0998: return null;
0999: }
1000:
1001: /**
1002: * Returns the first table in the response which matches the specified predicate and value.
1003: * Will recurse into any nested tables, as needed.
1004: * @return the selected table, or null if none is found
1005: **/
1006: public WebTable getFirstMatchingTable(
1007: HTMLElementPredicate predicate, Object criteria) {
1008: return getTableSatisfyingPredicate(getTables(), predicate,
1009: criteria);
1010: }
1011:
1012: /**
1013: * Returns the tables in the response which match the specified predicate and value.
1014: * Will recurse into any nested tables, as needed.
1015: * @return the selected tables, or null if none are found
1016: **/
1017: public WebTable[] getMatchingTables(HTMLElementPredicate predicate,
1018: Object criteria) {
1019: return getTablesSatisfyingPredicate(getTables(), predicate,
1020: criteria);
1021: }
1022:
1023: /**
1024: * Returns the first table in the response which has the specified text as the full text of
1025: * its first non-blank row and non-blank column. Will recurse into any nested tables, as needed.
1026: * @return the selected table, or null if none is found
1027: **/
1028: public WebTable getTableStartingWith(String text) {
1029: return getFirstMatchingTable(
1030: WebTable.MATCH_FIRST_NONBLANK_CELL, text);
1031: }
1032:
1033: /**
1034: * Returns the first table in the response which has the specified text as a prefix of the text
1035: * in its first non-blank row and non-blank column. Will recurse into any nested tables, as needed.
1036: * @return the selected table, or null if none is found
1037: **/
1038: public WebTable getTableStartingWithPrefix(String text) {
1039: return getFirstMatchingTable(
1040: WebTable.MATCH_FIRST_NONBLANK_CELL_PREFIX, text);
1041: }
1042:
1043: /**
1044: * Returns the first table in the response which has the specified text as its summary attribute.
1045: * Will recurse into any nested tables, as needed.
1046: * @return the selected table, or null if none is found
1047: **/
1048: public WebTable getTableWithSummary(String summary) {
1049: return getFirstMatchingTable(WebTable.MATCH_SUMMARY, summary);
1050: }
1051:
1052: /**
1053: * Returns the first table in the response which has the specified text as its ID attribute.
1054: * Will recurse into any nested tables, as needed.
1055: * @return the selected table, or null if none is found
1056: **/
1057: public WebTable getTableWithID(String ID) {
1058: return getFirstMatchingTable(WebTable.MATCH_ID, ID);
1059: }
1060:
1061: /**
1062: * Returns a copy of the domain object model associated with this page.
1063: **/
1064: public Node getDOM() {
1065: return getRootNode().cloneNode( /* deep */true);
1066: }
1067:
1068: //---------------------------------- Object methods --------------------------------
1069:
1070: public String toString() {
1071: return _baseURL.toExternalForm()
1072: + System.getProperty("line.separator") + _rootNode;
1073: }
1074:
1075: //---------------------------------- package members --------------------------------
1076:
1077: /**
1078: * Specifies the root node for this HTML fragment.
1079: */
1080: void setRootNode(Node rootNode) {
1081: if (_rootNode != null && rootNode != _rootNode)
1082: throw new IllegalStateException(
1083: "The root node has already been defined as "
1084: + _rootNode
1085: + " and cannot be redefined as " + rootNode);
1086: _rootNode = rootNode;
1087: _links = null;
1088: _forms = null;
1089: _images = null;
1090: _applets = null;
1091: _tables = null;
1092: _frames = null;
1093: _blocks = null;
1094: _updateElements = true;
1095: }
1096:
1097: /**
1098: * Returns the base URL for this HTML segment.
1099: **/
1100: URL getBaseURL() {
1101: return _baseURL;
1102: }
1103:
1104: WebResponse getResponse() {
1105: return _response;
1106: }
1107:
1108: /**
1109: * Returns the domain object model associated with this page, to be used internally.
1110: **/
1111: Node getOriginalDOM() {
1112: return getRootNode();
1113: }
1114:
1115: /**
1116: * Returns the frames found in the page in the order in which they appear.
1117: **/
1118: public WebFrame[] getFrames() {
1119: if (_frames == null) {
1120: loadElements();
1121: _frames = (WebFrame[]) _frameList
1122: .toArray(new WebFrame[_frameList.size()]);
1123: }
1124: return _frames;
1125: }
1126:
1127: //---------------------------------- private members --------------------------------
1128:
1129: Node getRootNode() {
1130: if (_rootNode == null)
1131: throw new IllegalStateException(
1132: "The root node has not been specified");
1133: return _rootNode;
1134: }
1135:
1136: /**
1137: * Returns the table with the specified text in its summary attribute.
1138: **/
1139: private WebTable getTableSatisfyingPredicate(WebTable[] tables,
1140: HTMLElementPredicate predicate, Object value) {
1141: for (int i = 0; i < tables.length; i++) {
1142: if (predicate.matchesCriteria(tables[i], value)) {
1143: return tables[i];
1144: } else {
1145: for (int j = 0; j < tables[i].getRowCount(); j++) {
1146: for (int k = 0; k < tables[i].getColumnCount(); k++) {
1147: TableCell cell = tables[i].getTableCell(j, k);
1148: if (cell != null) {
1149: WebTable[] innerTables = cell.getTables();
1150: if (innerTables.length != 0) {
1151: WebTable result = getTableSatisfyingPredicate(
1152: innerTables, predicate, value);
1153: if (result != null)
1154: return result;
1155: }
1156: }
1157: }
1158: }
1159: }
1160: }
1161: return null;
1162: }
1163:
1164: /**
1165: * Returns the tables which match the specified criteria.
1166: **/
1167: private WebTable[] getTablesSatisfyingPredicate(WebTable[] tables,
1168: HTMLElementPredicate predicate, Object value) {
1169: ArrayList matches = new ArrayList();
1170: for (int i = 0; i < tables.length; i++) {
1171: if (predicate.matchesCriteria(tables[i], value)) {
1172: matches.add(tables[i]);
1173: }
1174: for (int j = 0; j < tables[i].getRowCount(); j++) {
1175: for (int k = 0; k < tables[i].getColumnCount(); k++) {
1176: TableCell cell = tables[i].getTableCell(j, k);
1177: if (cell != null) {
1178: WebTable[] innerTables = cell.getTables();
1179: if (innerTables.length != 0) {
1180: WebTable[] result = getTablesSatisfyingPredicate(
1181: innerTables, predicate, value);
1182: if (result != null && result.length > 0) {
1183: for (int l = 0; l < result.length; l++) {
1184: matches.add(result[l]);
1185: }
1186: }
1187: }
1188: }
1189: }
1190: }
1191: }
1192: if (matches.size() > 0) {
1193: return (WebTable[]) matches.toArray(new WebTable[matches
1194: .size()]);
1195: } else {
1196: return null;
1197: }
1198: }
1199:
1200: class WebIFrame extends WebFrame implements ContentConcealer {
1201:
1202: public WebIFrame(URL baseURL, Node frameNode,
1203: FrameSelector parentFrame) {
1204: super (_response, baseURL, frameNode, parentFrame);
1205: }
1206: }
1207:
1208: class NoScriptElement extends HTMLElementBase implements
1209: ContentConcealer {
1210:
1211: public NoScriptElement(Node node) {
1212: super (node);
1213: }
1214:
1215: protected ScriptableDelegate newScriptable() {
1216: return null;
1217: }
1218:
1219: protected ScriptableDelegate getParentDelegate() {
1220: return null;
1221: }
1222: }
1223:
1224: }
|