0001: package com.meterware.httpunit;
0002:
0003: /********************************************************************************************************************
0004: * $Id: ParsedHTML.java 514405 2007-03-04 15:27:59Z frankbille $
0005: *
0006: * Copyright (c) 2000-2004, Russell Gold
0007: *
0008: * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
0009: * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
0010: * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
0011: * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
0012: *
0013: * The above copyright notice and this permission notice shall be included in all copies or substantial portions
0014: * of the Software.
0015: *
0016: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
0017: * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0018: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
0019: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
0020: * DEALINGS IN THE SOFTWARE.
0021: *
0022: *******************************************************************************************************************/
0023: import java.io.IOException;
0024: import java.net.URL;
0025: import java.util.ArrayList;
0026: import java.util.Arrays;
0027: import java.util.HashMap;
0028: import java.util.Iterator;
0029: import java.util.List;
0030:
0031: import org.w3c.dom.Document;
0032: import org.w3c.dom.Element;
0033: import org.w3c.dom.Node;
0034: import org.w3c.dom.NodeList;
0035:
0036: import com.meterware.httpunit.scripting.ScriptableDelegate;
0037:
0038: /**
0039: * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a>
0040: * @author <a href="mailto:bx@bigfoot.com">Benoit Xhenseval</a>
0041: */
0042: class ParsedHTML {
0043:
0044: final static private HTMLElement[] NO_ELEMENTS = new HTMLElement[0];
0045:
0046: final static private String[] TEXT_ELEMENTS = { "p", "h1", "h2",
0047: "h3", "h4", "h5", "h6" };
0048:
0049: private Node _rootNode;
0050:
0051: private URL _baseURL;
0052:
0053: private FrameSelector _frame;
0054:
0055: private String _baseTarget;
0056:
0057: private String _characterSet;
0058:
0059: private WebResponse _response;
0060:
0061: private boolean _updateElements = true;
0062:
0063: private boolean _enableNoScriptNodes;
0064:
0065: /** map of element IDs to elements. * */
0066: private HashMap _elementsByID = new HashMap();
0067:
0068: /** map of element names to lists of elements. * */
0069: private HashMap _elementsByName = new HashMap();
0070:
0071: /** map of DOM elements to HTML elements * */
0072: private HashMap _elements = new HashMap();
0073:
0074: private ArrayList _formsList = new ArrayList();
0075: private WebForm[] _forms;
0076: private WebForm _activeForm;
0077:
0078: private ArrayList _imagesList = new ArrayList();
0079: private WebImage[] _images;
0080:
0081: private ArrayList _linkList = new ArrayList();
0082: private WebLink[] _links;
0083:
0084: private ArrayList _blocksList = new ArrayList();
0085: private TextBlock[] _blocks;
0086:
0087: private ArrayList _appletList = new ArrayList();
0088: private WebApplet[] _applets;
0089:
0090: private ArrayList _tableList = new ArrayList();
0091: private WebTable[] _tables;
0092:
0093: private ArrayList _frameList = new ArrayList();
0094: private WebFrame[] _frames;
0095:
0096: ParsedHTML(WebResponse response, FrameSelector frame, URL baseURL,
0097: String baseTarget, Node rootNode, String characterSet) {
0098: _response = response;
0099: _frame = frame;
0100: _baseURL = baseURL;
0101: _baseTarget = baseTarget;
0102: _rootNode = rootNode;
0103: _characterSet = characterSet;
0104: }
0105:
0106: /**
0107: * Returns the forms found in the page in the order in which they appear.
0108: *
0109: * @return Forms
0110: */
0111: public WebForm[] getForms() {
0112: if (_forms == null) {
0113: loadElements();
0114: _forms = (WebForm[]) _formsList
0115: .toArray(new WebForm[_formsList.size()]);
0116: }
0117: return _forms;
0118: }
0119:
0120: /**
0121: * Returns the links found in the page in the order in which they appear.
0122: *
0123: * @return Links
0124: */
0125: public WebLink[] getLinks() {
0126: if (_links == null) {
0127: loadElements();
0128: _links = (WebLink[]) _linkList
0129: .toArray(new WebLink[_linkList.size()]);
0130: }
0131: return _links;
0132: }
0133:
0134: /**
0135: * Returns a proxy for each applet found embedded in this page.
0136: *
0137: * @return Applets
0138: */
0139: public WebApplet[] getApplets() {
0140: if (_applets == null) {
0141: loadElements();
0142: _applets = (WebApplet[]) _appletList
0143: .toArray(new WebApplet[_appletList.size()]);
0144: }
0145: return _applets;
0146: }
0147:
0148: /**
0149: * Returns the images found in the page in the order in which they appear.
0150: *
0151: * @return Images
0152: */
0153: public WebImage[] getImages() {
0154: if (_images == null) {
0155: loadElements();
0156: _images = (WebImage[]) _imagesList
0157: .toArray(new WebImage[_imagesList.size()]);
0158: }
0159: return _images;
0160: }
0161:
0162: /**
0163: * Returns the top-level block elements found in the page in the order in
0164: * which they appear.
0165: *
0166: * @return Text blocks
0167: */
0168: public TextBlock[] getTextBlocks() {
0169: if (_blocks == null) {
0170: loadElements();
0171: _blocks = (TextBlock[]) _blocksList
0172: .toArray(new TextBlock[_blocksList.size()]);
0173: }
0174: return _blocks;
0175: }
0176:
0177: /**
0178: * Returns the first text block found in the page which matches the
0179: * specified predicate and value.
0180: *
0181: * @param predicate
0182: * @param criteria
0183: * @return Text block
0184: */
0185: public TextBlock getFirstMatchingTextBlock(
0186: HTMLElementPredicate predicate, Object criteria) {
0187: TextBlock[] blocks = getTextBlocks();
0188: for (int i = 0; i < blocks.length; i++) {
0189: if (predicate.matchesCriteria(blocks[i], criteria)) {
0190: return blocks[i];
0191: }
0192: }
0193: return null;
0194: }
0195:
0196: /**
0197: * @param block
0198: * @return Text block
0199: */
0200: public TextBlock getNextTextBlock(TextBlock block) {
0201: int index = _blocksList.indexOf(block);
0202: if (index < 0 || index == _blocksList.size() - 1) {
0203: return null;
0204: }
0205: return (TextBlock) _blocksList.get(index + 1);
0206: }
0207:
0208: /**
0209: * Returns the top-level tables found in the page in the order in which they
0210: * appear.
0211: *
0212: * @return Tables
0213: */
0214: public WebTable[] getTables() {
0215: if (_tables == null) {
0216: loadElements();
0217: _tables = (WebTable[]) _tableList
0218: .toArray(new WebTable[_tableList.size()]);
0219: }
0220: return _tables;
0221: }
0222:
0223: /**
0224: * Returns the HTMLElement with the specified ID.
0225: *
0226: * @param id
0227: * @return Element
0228: */
0229: public HTMLElement getElementWithID(String id) {
0230: return (HTMLElement) getElementWithID(id, HTMLElement.class);
0231: }
0232:
0233: /**
0234: * Returns the HTML elements with the specified name.
0235: *
0236: * @param name
0237: * @return Element
0238: */
0239: public HTMLElement[] getElementsWithName(String name) {
0240: loadElements();
0241: ArrayList elements = (ArrayList) _elementsByName.get(name);
0242: return elements == null ? NO_ELEMENTS
0243: : (HTMLElement[]) elements
0244: .toArray(new HTMLElement[elements.size()]);
0245: }
0246:
0247: /**
0248: * Returns the HTML elements with an attribute with the specified name and
0249: * value.
0250: *
0251: * @param name
0252: * @param value
0253: * @return Elements
0254: */
0255: public HTMLElement[] getElementsWithAttribute(String name,
0256: String value) {
0257: loadElements();
0258: ArrayList elements = new ArrayList();
0259: for (Iterator i = _elements.values().iterator(); i.hasNext();) {
0260: HTMLElement element = (HTMLElement) i.next();
0261: if (value.equals(element.getAttribute(name))) {
0262: elements.add(element);
0263: }
0264: }
0265: return (HTMLElement[]) elements
0266: .toArray(new HTMLElement[elements.size()]);
0267: }
0268:
0269: /**
0270: * Returns a list of HTML element names contained in this HTML section.
0271: *
0272: * @return Names
0273: */
0274: public String[] getElementNames() {
0275: loadElements();
0276: return (String[]) _elementsByName.keySet().toArray(
0277: new String[_elementsByName.size()]);
0278: }
0279:
0280: HTMLElement[] getElementsByTagName(Node dom, String name) {
0281: loadElements();
0282: if (dom instanceof Element) {
0283: return getElementsFromList(((Element) dom)
0284: .getElementsByTagName(name));
0285: } else {
0286: return getElementsFromList(((Document) dom)
0287: .getElementsByTagName(name));
0288: }
0289: }
0290:
0291: private HTMLElement[] getElementsFromList(NodeList nl) {
0292: HTMLElement[] elements = new HTMLElement[nl.getLength()];
0293: for (int i = 0; i < elements.length; i++) {
0294: Node node = nl.item(i);
0295: elements[i] = (HTMLElement) _elements.get(node);
0296: if (elements[i] == null) {
0297: elements[i] = toDefaultElement((Element) node);
0298: _elements.put(node, elements[i]);
0299: }
0300: }
0301: return elements;
0302: }
0303:
0304: /**
0305: * Returns the form found in the page with the specified ID.
0306: *
0307: * @param id
0308: * @return Form
0309: */
0310: public WebForm getFormWithID(String id) {
0311: return (WebForm) getElementWithID(id, WebForm.class);
0312: }
0313:
0314: /**
0315: * Returns the link found in the page with the specified ID.
0316: *
0317: * @param id
0318: * @return Link
0319: */
0320: public WebLink getLinkWithID(String id) {
0321: return (WebLink) getElementWithID(id, WebLink.class);
0322:
0323: }
0324:
0325: private Object getElementWithID(String id, final Class klass) {
0326: loadElements();
0327: return whenCast(_elementsByID.get(id), klass);
0328: }
0329:
0330: private Object whenCast(Object o, Class klass) {
0331: return klass.isInstance(o) ? o : null;
0332: }
0333:
0334: /**
0335: * Returns the first link found in the page matching the specified criteria.
0336: *
0337: * @param predicate
0338: * @param criteria
0339: * @return Form
0340: */
0341: public WebForm getFirstMatchingForm(HTMLElementPredicate predicate,
0342: Object criteria) {
0343: WebForm[] forms = getForms();
0344: for (int i = 0; i < forms.length; i++) {
0345: if (predicate.matchesCriteria(forms[i], criteria)) {
0346: return forms[i];
0347: }
0348: }
0349: return null;
0350: }
0351:
0352: /**
0353: * Returns all links found in the page matching the specified criteria.
0354: *
0355: * @param predicate
0356: * @param criteria
0357: * @return Forms
0358: */
0359: public WebForm[] getMatchingForms(HTMLElementPredicate predicate,
0360: Object criteria) {
0361: ArrayList matches = new ArrayList();
0362: WebForm[] forms = getForms();
0363: for (int i = 0; i < forms.length; i++) {
0364: if (predicate.matchesCriteria(forms[i], criteria)) {
0365: matches.add(forms[i]);
0366: }
0367: }
0368: return (WebForm[]) matches.toArray(new WebForm[matches.size()]);
0369: }
0370:
0371: /**
0372: * Returns the form found in the page with the specified name.
0373: *
0374: * @param name
0375: * @return Form
0376: */
0377: public WebForm getFormWithName(String name) {
0378: return getFirstMatchingForm(WebForm.MATCH_NAME, name);
0379: }
0380:
0381: private void interpretScriptElement(Element element) {
0382: String script = getScript(element);
0383: if (script != null) {
0384: try {
0385: _updateElements = false;
0386: String language = NodeUtils.getNodeAttribute(element,
0387: "language", null);
0388: if (!getResponse().getScriptableObject()
0389: .supportsScript(language)) {
0390: _enableNoScriptNodes = true;
0391: }
0392: getResponse().getScriptableObject().runScript(language,
0393: script);
0394: } finally {
0395: setRootNode(_rootNode);
0396: }
0397: }
0398: }
0399:
0400: private String getScript(Node scriptNode) {
0401: String scriptLocation = NodeUtils.getNodeAttribute(scriptNode,
0402: "src", null);
0403: if (scriptLocation == null) {
0404: return NodeUtils.asText(scriptNode.getChildNodes());
0405: } else {
0406: try {
0407: return getIncludedScript(scriptLocation);
0408: } catch (IOException e) {
0409: throw new RuntimeException(
0410: "Error loading included script: " + e);
0411: }
0412: }
0413: }
0414:
0415: /**
0416: * Returns the contents of an included script, given its src attribute.
0417: *
0418: * @param srcAttribute
0419: * @return the contents of the script.
0420: * @throws java.io.IOException
0421: * if there is a problem retrieving the script
0422: */
0423: String getIncludedScript(String srcAttribute) throws IOException {
0424: WebRequest req = new GetMethodWebRequest(getBaseURL(),
0425: srcAttribute);
0426: WebWindow window = getResponse().getWindow();
0427: if (window == null) {
0428: throw new IllegalStateException(
0429: "Unable to retrieve script included by this response, since it was loaded by getResource(). Use getResponse() instead.");
0430: }
0431: return window.getResource(req).getText();
0432: }
0433:
0434: /**
0435: * If noscript node content is enabled, returns null - otherwise returns a
0436: * concealing element.
0437: */
0438: private HTMLElement toNoscriptElement(Element element) {
0439: return _enableNoScriptNodes ? null : new NoScriptElement(
0440: element);
0441: }
0442:
0443: static class HtmlElementRecorder {
0444:
0445: protected void recordHtmlElement(
0446: NodeUtils.PreOrderTraversal pot, Node node,
0447: HTMLElement htmlElement) {
0448: if (htmlElement != null) {
0449: addToMaps(pot, node, htmlElement);
0450: addToLists(pot, htmlElement);
0451: }
0452: }
0453:
0454: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0455: HTMLElement htmlElement) {
0456: for (Iterator i = pot.getContexts(); i.hasNext();) {
0457: Object o = i.next();
0458: if (o instanceof ParsedHTML) {
0459: ((ParsedHTML) o).addToList(htmlElement);
0460: }
0461: }
0462: }
0463:
0464: protected void addToMaps(NodeUtils.PreOrderTraversal pot,
0465: Node node, HTMLElement htmlElement) {
0466: for (Iterator i = pot.getContexts(); i.hasNext();) {
0467: Object o = i.next();
0468: if (o instanceof ParsedHTML) {
0469: ((ParsedHTML) o).addToMaps(node, htmlElement);
0470: }
0471: }
0472: }
0473:
0474: }
0475:
0476: abstract static class HTMLElementFactory extends
0477: HtmlElementRecorder {
0478: abstract HTMLElement toHTMLElement(
0479: NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML,
0480: Element element);
0481:
0482: void recordElement(NodeUtils.PreOrderTraversal pot,
0483: Element element, ParsedHTML parsedHTML) {
0484: HTMLElement htmlElement = toHTMLElement(pot, parsedHTML,
0485: element);
0486: recordHtmlElement(pot, element, htmlElement);
0487: }
0488:
0489: protected boolean isRecognized(ClientProperties properties) {
0490: return true;
0491: }
0492:
0493: protected boolean addToContext() {
0494: return false;
0495: }
0496:
0497: final protected ParsedHTML getParsedHTML(
0498: NodeUtils.PreOrderTraversal pot) {
0499: return (ParsedHTML) getClosestContext(pot, ParsedHTML.class);
0500: }
0501:
0502: final protected Object getClosestContext(
0503: NodeUtils.PreOrderTraversal pot, Class aClass) {
0504: return pot.getClosestContext(aClass);
0505: }
0506:
0507: protected ParsedHTML getRootContext(
0508: NodeUtils.PreOrderTraversal pot) {
0509: return (ParsedHTML) pot.getRootContext();
0510: }
0511: }
0512:
0513: static class DefaultElementFactory extends HTMLElementFactory {
0514:
0515: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0516: ParsedHTML parsedHTML, Element element) {
0517: if (element.getAttribute("id").equals("")) {
0518: return null;
0519: }
0520: return parsedHTML.toDefaultElement(element);
0521: }
0522:
0523: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0524: HTMLElement htmlElement) {
0525: }
0526: }
0527:
0528: private HTMLElement toDefaultElement(Element element) {
0529: return new HTMLElementBase(element) {
0530: protected ScriptableDelegate newScriptable() {
0531: return new HTMLElementScriptable(this );
0532: }
0533:
0534: protected ScriptableDelegate getParentDelegate() {
0535: return getResponse().getScriptableObject()
0536: .getDocument();
0537: }
0538: };
0539: }
0540:
0541: static class WebFormFactory extends HTMLElementFactory {
0542: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0543: ParsedHTML parsedHTML, Element element) {
0544: return parsedHTML.toWebForm(element);
0545: }
0546:
0547: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0548: HTMLElement htmlElement) {
0549: super .addToLists(pot, htmlElement);
0550: getRootContext(pot)._activeForm = (WebForm) htmlElement;
0551: }
0552: }
0553:
0554: static class WebLinkFactory extends HTMLElementFactory {
0555: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0556: ParsedHTML parsedHTML, Element element) {
0557: return parsedHTML.toLinkAnchor(element);
0558: }
0559: }
0560:
0561: static class TextBlockFactory extends HTMLElementFactory {
0562: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0563: ParsedHTML parsedHTML, Element element) {
0564: return parsedHTML.toTextBlock(element);
0565: }
0566:
0567: protected boolean addToContext() {
0568: return true;
0569: }
0570:
0571: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0572: HTMLElement htmlElement) {
0573: for (Iterator i = pot.getContexts(); i.hasNext();) {
0574: Object o = i.next();
0575: if (!(o instanceof ParsedHTML)) {
0576: continue;
0577: }
0578: ((ParsedHTML) o).addToList(htmlElement);
0579: break;
0580: }
0581: }
0582:
0583: }
0584:
0585: static class ScriptFactory extends HTMLElementFactory {
0586:
0587: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0588: ParsedHTML parsedHTML, Element element) {
0589: return null;
0590: }
0591:
0592: void recordElement(NodeUtils.PreOrderTraversal pot,
0593: Element element, ParsedHTML parsedHTML) {
0594: parsedHTML.interpretScriptElement(element);
0595: }
0596: }
0597:
0598: static class NoScriptFactory extends HTMLElementFactory {
0599:
0600: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0601: ParsedHTML parsedHTML, Element element) {
0602: return parsedHTML.toNoscriptElement(element);
0603: }
0604:
0605: protected boolean addToContext() {
0606: return true;
0607: }
0608: }
0609:
0610: static class WebFrameFactory extends HTMLElementFactory {
0611: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0612: ParsedHTML parsedHTML, Element element) {
0613: return parsedHTML.toWebFrame(element);
0614: }
0615: }
0616:
0617: static class WebIFrameFactory extends HTMLElementFactory {
0618: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0619: ParsedHTML parsedHTML, Element element) {
0620: return parsedHTML.toWebIFrame(element);
0621: }
0622:
0623: protected boolean isRecognized(ClientProperties properties) {
0624: return properties.isIframeSupported();
0625: }
0626:
0627: protected boolean addToContext() {
0628: return true;
0629: }
0630: }
0631:
0632: static class WebImageFactory extends HTMLElementFactory {
0633: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0634: ParsedHTML parsedHTML, Element element) {
0635: return parsedHTML.toWebImage(element);
0636: }
0637: }
0638:
0639: static class WebAppletFactory extends HTMLElementFactory {
0640: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0641: ParsedHTML parsedHTML, Element element) {
0642: return parsedHTML.toWebApplet(element);
0643: }
0644:
0645: protected boolean addToContext() {
0646: return true;
0647: }
0648: }
0649:
0650: static class WebTableFactory extends HTMLElementFactory {
0651: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0652: ParsedHTML parsedHTML, Element element) {
0653: return parsedHTML.toWebTable(element);
0654: }
0655:
0656: protected boolean addToContext() {
0657: return true;
0658: }
0659:
0660: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0661: HTMLElement htmlElement) {
0662: getParsedHTML(pot).addToList(htmlElement);
0663: }
0664: }
0665:
0666: static class TableRowFactory extends HTMLElementFactory {
0667: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0668: ParsedHTML parsedHTML, Element element) {
0669: WebTable wt = getWebTable(pot);
0670: if (wt == null) {
0671: return null;
0672: }
0673: return wt.newTableRow(element);
0674: }
0675:
0676: private WebTable getWebTable(NodeUtils.PreOrderTraversal pot) {
0677: return (WebTable) getClosestContext(pot, WebTable.class);
0678: }
0679:
0680: protected boolean addToContext() {
0681: return true;
0682: }
0683:
0684: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0685: HTMLElement htmlElement) {
0686: getWebTable(pot).addRow((WebTable.TableRow) htmlElement);
0687: }
0688: }
0689:
0690: static class TableCellFactory extends HTMLElementFactory {
0691: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0692: ParsedHTML parsedHTML, Element element) {
0693: WebTable.TableRow tr = getTableRow(pot);
0694: if (tr == null) {
0695: return null;
0696: }
0697: return tr.newTableCell(element);
0698: }
0699:
0700: private WebTable.TableRow getTableRow(
0701: NodeUtils.PreOrderTraversal pot) {
0702: return (WebTable.TableRow) getClosestContext(pot,
0703: WebTable.TableRow.class);
0704: }
0705:
0706: protected boolean addToContext() {
0707: return true;
0708: }
0709:
0710: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0711: HTMLElement htmlElement) {
0712: getTableRow(pot).addTableCell((TableCell) htmlElement);
0713: }
0714: }
0715:
0716: static class FormControlFactory extends HTMLElementFactory {
0717:
0718: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0719: ParsedHTML parsedHTML, Element element) {
0720: final WebForm form = getForm(pot);
0721: return form == null ? newControlWithoutForm(parsedHTML,
0722: element) : form.newFormControl(element);
0723: }
0724:
0725: private HTMLElement newControlWithoutForm(
0726: ParsedHTML parsedHTML, Element element) {
0727: if (element.getNodeName().equalsIgnoreCase("button")
0728: && isValidNonFormButtonType(NodeUtils
0729: .getNodeAttribute(element, "type"))) {
0730: return parsedHTML.toButtonWithoutForm(element);
0731: } else {
0732: return null;
0733: }
0734: }
0735:
0736: private boolean isValidNonFormButtonType(String buttonType) {
0737: return buttonType.equals("")
0738: || buttonType.equalsIgnoreCase("button");
0739: }
0740:
0741: private WebForm getForm(NodeUtils.PreOrderTraversal pot) {
0742: return getRootContext(pot)._activeForm;
0743: }
0744:
0745: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0746: HTMLElement htmlElement) {
0747: WebForm form = getForm(pot);
0748: if (form != null) {
0749: form.addFormControl((FormControl) htmlElement);
0750: }
0751: }
0752: }
0753:
0754: static class WebListFactory extends HTMLElementFactory {
0755: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0756: ParsedHTML parsedHTML, Element element) {
0757: return parsedHTML.toOrderedList(element);
0758: }
0759:
0760: protected boolean addToContext() {
0761: return true;
0762: }
0763:
0764: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0765: HTMLElement htmlElement) {
0766: TextBlock textBlock = getTextBlock(pot);
0767: if (textBlock != null) {
0768: textBlock.addList((WebList) htmlElement);
0769: }
0770: }
0771:
0772: private TextBlock getTextBlock(NodeUtils.PreOrderTraversal pot) {
0773: return (TextBlock) getClosestContext(pot, TextBlock.class);
0774: }
0775: }
0776:
0777: static class ListItemFactory extends HTMLElementFactory {
0778: HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot,
0779: ParsedHTML parsedHTML, Element element) {
0780: WebList webList = getWebList(pot);
0781: if (webList == null) {
0782: return null;
0783: }
0784: return webList.addNewItem(element);
0785: }
0786:
0787: private WebList getWebList(NodeUtils.PreOrderTraversal pot) {
0788: return (WebList) getClosestContext(pot, WebList.class);
0789: }
0790:
0791: protected boolean addToContext() {
0792: return true;
0793: }
0794:
0795: protected void addToLists(NodeUtils.PreOrderTraversal pot,
0796: HTMLElement htmlElement) {
0797: }
0798: }
0799:
0800: private static HashMap _htmlFactoryClasses = new HashMap();
0801: private static HTMLElementFactory _defaultFactory = new DefaultElementFactory();
0802:
0803: static {
0804: _htmlFactoryClasses.put("a", new WebLinkFactory());
0805: _htmlFactoryClasses.put("area", new WebLinkFactory());
0806: _htmlFactoryClasses.put("form", new WebFormFactory());
0807: _htmlFactoryClasses.put("img", new WebImageFactory());
0808: _htmlFactoryClasses.put("applet", new WebAppletFactory());
0809: _htmlFactoryClasses.put("table", new WebTableFactory());
0810: _htmlFactoryClasses.put("tr", new TableRowFactory());
0811: _htmlFactoryClasses.put("td", new TableCellFactory());
0812: _htmlFactoryClasses.put("th", new TableCellFactory());
0813: _htmlFactoryClasses.put("frame", new WebFrameFactory());
0814: _htmlFactoryClasses.put("iframe", new WebIFrameFactory());
0815: _htmlFactoryClasses.put("script", new ScriptFactory());
0816: _htmlFactoryClasses.put("noscript", new NoScriptFactory());
0817: _htmlFactoryClasses.put("ol", new WebListFactory());
0818: _htmlFactoryClasses.put("ul", new WebListFactory());
0819: _htmlFactoryClasses.put("li", new ListItemFactory());
0820:
0821: for (int i = 0; i < TEXT_ELEMENTS.length; i++) {
0822: _htmlFactoryClasses.put(TEXT_ELEMENTS[i],
0823: new TextBlockFactory());
0824: }
0825:
0826: for (Iterator i = Arrays.asList(
0827: FormControl.getControlElementTags()).iterator(); i
0828: .hasNext();) {
0829: _htmlFactoryClasses.put(i.next(), new FormControlFactory());
0830: }
0831: }
0832:
0833: private static HTMLElementFactory getHTMLElementFactory(
0834: String tagName) {
0835: final HTMLElementFactory factory = (HTMLElementFactory) _htmlFactoryClasses
0836: .get(tagName);
0837: return factory != null ? factory : _defaultFactory;
0838: }
0839:
0840: private void loadElements() {
0841: if (!_updateElements) {
0842: return;
0843: }
0844:
0845: NodeUtils.NodeAction action = new NodeUtils.NodeAction() {
0846: public boolean processElement(
0847: NodeUtils.PreOrderTraversal pot, Element element) {
0848: HTMLElementFactory factory = getHTMLElementFactory(element
0849: .getNodeName().toLowerCase());
0850: if (factory == null
0851: || !factory.isRecognized(getClientProperties())) {
0852: return true;
0853: }
0854: if (pot.getClosestContext(ContentConcealer.class) != null) {
0855: return true;
0856: }
0857:
0858: if (!_elements.containsKey(element)) {
0859: factory
0860: .recordElement(pot, element,
0861: ParsedHTML.this );
0862: }
0863: if (factory.addToContext()) {
0864: pot.pushContext(_elements.get(element));
0865: }
0866:
0867: return true;
0868: }
0869:
0870: public void processTextNode(
0871: NodeUtils.PreOrderTraversal pot, Node textNode) {
0872: if (textNode.getNodeValue().trim().length() == 0) {
0873: return;
0874: }
0875:
0876: Node parent = textNode.getParentNode();
0877: if (!parent.getNodeName().equalsIgnoreCase("body")) {
0878: return;
0879: }
0880: if (pot.getClosestContext(ContentConcealer.class) != null) {
0881: return;
0882: }
0883: new HtmlElementRecorder().recordHtmlElement(pot,
0884: textNode, newTextBlock(textNode));
0885: }
0886: };
0887: NodeUtils.PreOrderTraversal nt = new NodeUtils.PreOrderTraversal(
0888: getRootNode());
0889: nt.pushBaseContext(this );
0890: nt.perform(action);
0891:
0892: _updateElements = false;
0893: }
0894:
0895: private ClientProperties getClientProperties() {
0896: WebWindow window = _response.getWindow();
0897: return window == null ? ClientProperties.getDefaultProperties()
0898: : window.getClient().getClientProperties();
0899: }
0900:
0901: private Button toButtonWithoutForm(Element element) {
0902: return new Button(_response, element);
0903: }
0904:
0905: private WebForm toWebForm(Element element) {
0906: return new WebForm(_response, _baseURL, element, _frame,
0907: _baseTarget, _characterSet);
0908: }
0909:
0910: private WebFrame toWebFrame(Element element) {
0911: return new WebFrame(_response, _baseURL, element, _frame);
0912: }
0913:
0914: private WebFrame toWebIFrame(Element element) {
0915: return new WebIFrame(_baseURL, element, _frame);
0916: }
0917:
0918: private WebLink toLinkAnchor(Element child) {
0919: return (!isWebLink(child)) ? null : new WebLink(_response,
0920: _baseURL, child, _frame, _baseTarget, _characterSet);
0921: }
0922:
0923: private boolean isWebLink(Node node) {
0924: return (node.getAttributes().getNamedItem("href") != null);
0925: }
0926:
0927: private WebImage toWebImage(Element child) {
0928: return new WebImage(_response, this , _baseURL, child, _frame,
0929: _baseTarget, _characterSet);
0930: }
0931:
0932: private WebApplet toWebApplet(Element element) {
0933: return new WebApplet(_response, element, _baseTarget);
0934: }
0935:
0936: private WebTable toWebTable(Element element) {
0937: return new WebTable(_response, _frame, element, _baseURL,
0938: _baseTarget, _characterSet);
0939: }
0940:
0941: private TextBlock toTextBlock(Element element) {
0942: return new TextBlock(_response, _frame, _baseURL, _baseTarget,
0943: element, _characterSet);
0944: }
0945:
0946: private TextBlock newTextBlock(Node textNode) {
0947: return new TextBlock(_response, _frame, _baseURL, _baseTarget,
0948: textNode, _characterSet);
0949: }
0950:
0951: private WebList toOrderedList(Element element) {
0952: return new WebList(_response, _frame, _baseURL, _baseTarget,
0953: element, _characterSet);
0954: }
0955:
0956: private void addToMaps(Node node, HTMLElement htmlElement) {
0957: _elements.put(node, htmlElement);
0958: if (htmlElement.getID() != null) {
0959: _elementsByID.put(htmlElement.getID(), htmlElement);
0960: }
0961: if (htmlElement.getName() != null) {
0962: addNamedElement(htmlElement.getName(), htmlElement);
0963: }
0964: }
0965:
0966: private void addNamedElement(String name, HTMLElement htmlElement) {
0967: List list = (List) _elementsByName.get(name);
0968: if (list == null) {
0969: _elementsByName.put(name, list = new ArrayList());
0970: }
0971: list.add(htmlElement);
0972: }
0973:
0974: private void addToList(HTMLElement htmlElement) {
0975: ArrayList list = getListForElement(htmlElement);
0976: if (list != null) {
0977: list.add(htmlElement);
0978: }
0979: }
0980:
0981: private ArrayList getListForElement(HTMLElement element) {
0982: if (element instanceof WebLink) {
0983: return _linkList;
0984: }
0985: if (element instanceof WebForm) {
0986: return _formsList;
0987: }
0988: if (element instanceof WebImage) {
0989: return _imagesList;
0990: }
0991: if (element instanceof WebApplet) {
0992: return _appletList;
0993: }
0994: if (element instanceof WebTable) {
0995: return _tableList;
0996: }
0997: if (element instanceof WebFrame) {
0998: return _frameList;
0999: }
1000: if (element instanceof BlockElement) {
1001: return _blocksList;
1002: }
1003: return null;
1004: }
1005:
1006: /**
1007: * Returns the first link which contains the specified text.
1008: *
1009: * @param text
1010: * @return Link
1011: */
1012: public WebLink getLinkWith(String text) {
1013: return getFirstMatchingLink(WebLink.MATCH_CONTAINED_TEXT, text);
1014: }
1015:
1016: /**
1017: * Returns the link which contains the first image with the specified text
1018: * as its 'alt' attribute.
1019: *
1020: * @param text
1021: * @return Link
1022: */
1023: public WebLink getLinkWithImageText(String text) {
1024: WebImage image = getImageWithAltText(text);
1025: return image == null ? null : image.getLink();
1026: }
1027:
1028: /**
1029: * Returns the link found in the page with the specified name.
1030: *
1031: * @param name
1032: * @return Link
1033: */
1034: public WebLink getLinkWithName(String name) {
1035: return getFirstMatchingLink(WebLink.MATCH_NAME, name);
1036: }
1037:
1038: /**
1039: * Returns the first link found in the page matching the specified criteria.
1040: *
1041: * @param predicate
1042: * @param criteria
1043: * @return Link
1044: */
1045: public WebLink getFirstMatchingLink(HTMLElementPredicate predicate,
1046: Object criteria) {
1047: WebLink[] links = getLinks();
1048: for (int i = 0; i < links.length; i++) {
1049: if (predicate.matchesCriteria(links[i], criteria)) {
1050: return links[i];
1051: }
1052: }
1053: return null;
1054: }
1055:
1056: /**
1057: * Returns all links found in the page matching the specified criteria.
1058: *
1059: * @param predicate
1060: * @param criteria
1061: * @return Links
1062: */
1063: public WebLink[] getMatchingLinks(HTMLElementPredicate predicate,
1064: Object criteria) {
1065: ArrayList matches = new ArrayList();
1066: WebLink[] links = getLinks();
1067: for (int i = 0; i < links.length; i++) {
1068: if (predicate.matchesCriteria(links[i], criteria)) {
1069: matches.add(links[i]);
1070: }
1071: }
1072: return (WebLink[]) matches.toArray(new WebLink[matches.size()]);
1073: }
1074:
1075: /**
1076: * Returns the image found in the page with the specified name.
1077: *
1078: * @param name
1079: * @return Image
1080: */
1081: public WebImage getImageWithName(String name) {
1082: WebImage[] images = getImages();
1083: for (int i = 0; i < images.length; i++) {
1084: if (HttpUnitUtils.matches(name, images[i].getName())) {
1085: return images[i];
1086: }
1087: }
1088: return null;
1089: }
1090:
1091: /**
1092: * Returns the first image found in the page with the specified src
1093: * attribute.
1094: *
1095: * @param source
1096: * @return Image
1097: */
1098: public WebImage getImageWithSource(String source) {
1099: WebImage[] images = getImages();
1100: for (int i = 0; i < images.length; i++) {
1101: if (HttpUnitUtils.matches(source, images[i].getSource())) {
1102: return images[i];
1103: }
1104: }
1105: return null;
1106: }
1107:
1108: /**
1109: * Returns the first image found in the page with the specified alt
1110: * attribute.
1111: *
1112: * @param altText
1113: * @return Image
1114: */
1115: public WebImage getImageWithAltText(String altText) {
1116: WebImage[] images = getImages();
1117: for (int i = 0; i < images.length; i++) {
1118: if (HttpUnitUtils.matches(altText, images[i].getAltText())) {
1119: return images[i];
1120: }
1121: }
1122: return null;
1123: }
1124:
1125: /**
1126: * Returns the first table in the response which matches the specified
1127: * predicate and value. Will recurse into any nested tables, as needed.
1128: *
1129: * @param predicate
1130: * @param criteria
1131: * @return the selected table, or null if none is found
1132: */
1133: public WebTable getFirstMatchingTable(
1134: HTMLElementPredicate predicate, Object criteria) {
1135: return getTableSatisfyingPredicate(getTables(), predicate,
1136: criteria);
1137: }
1138:
1139: /**
1140: * Returns the tables in the response which match the specified predicate
1141: * and value. Will recurse into any nested tables, as needed.
1142: *
1143: * @param predicate
1144: * @param criteria
1145: * @return the selected tables, or null if none are found
1146: */
1147: public WebTable[] getMatchingTables(HTMLElementPredicate predicate,
1148: Object criteria) {
1149: return getTablesSatisfyingPredicate(getTables(), predicate,
1150: criteria);
1151: }
1152:
1153: /**
1154: * Returns the first table in the response which has the specified text as
1155: * the full text of its first non-blank row and non-blank column. Will
1156: * recurse into any nested tables, as needed.
1157: *
1158: * @param text
1159: * @return the selected table, or null if none is found
1160: */
1161: public WebTable getTableStartingWith(String text) {
1162: return getFirstMatchingTable(
1163: WebTable.MATCH_FIRST_NONBLANK_CELL, text);
1164: }
1165:
1166: /**
1167: * Returns the first table in the response which has the specified text as a
1168: * prefix of the text in its first non-blank row and non-blank column. Will
1169: * recurse into any nested tables, as needed.
1170: *
1171: * @param text
1172: * @return the selected table, or null if none is found
1173: */
1174: public WebTable getTableStartingWithPrefix(String text) {
1175: return getFirstMatchingTable(
1176: WebTable.MATCH_FIRST_NONBLANK_CELL_PREFIX, text);
1177: }
1178:
1179: /**
1180: * Returns the first table in the response which has the specified text as
1181: * its summary attribute. Will recurse into any nested tables, as needed.
1182: *
1183: * @param summary
1184: * @return the selected table, or null if none is found
1185: */
1186: public WebTable getTableWithSummary(String summary) {
1187: return getFirstMatchingTable(WebTable.MATCH_SUMMARY, summary);
1188: }
1189:
1190: /**
1191: * Returns the first table in the response which has the specified text as
1192: * its ID attribute. Will recurse into any nested tables, as needed.
1193: *
1194: * @param ID
1195: * @return the selected table, or null if none is found
1196: */
1197: public WebTable getTableWithID(String ID) {
1198: return getFirstMatchingTable(WebTable.MATCH_ID, ID);
1199: }
1200:
1201: /**
1202: * Returns a copy of the domain object model associated with this page.
1203: *
1204: * @return Node
1205: */
1206: public Node getDOM() {
1207: // JDo: see README
1208: // Better would be some read-only attribute
1209: // return getRootNode().cloneNode( /* deep */ true );
1210: return getRootNode();
1211: }
1212:
1213: // ---------------------------------- Object methods
1214: // --------------------------------
1215:
1216: /**
1217: * @see java.lang.Object#toString()
1218: */
1219: public String toString() {
1220: return _baseURL.toExternalForm()
1221: + System.getProperty("line.separator") + _rootNode;
1222: }
1223:
1224: // ---------------------------------- package members
1225: // --------------------------------
1226:
1227: /**
1228: * Specifies the root node for this HTML fragment.
1229: */
1230: void setRootNode(Node rootNode) {
1231: if (_rootNode != null && rootNode != _rootNode) {
1232: throw new IllegalStateException(
1233: "The root node has already been defined as "
1234: + _rootNode
1235: + " and cannot be redefined as " + rootNode);
1236: }
1237: _rootNode = rootNode;
1238: _links = null;
1239: _forms = null;
1240: _images = null;
1241: _applets = null;
1242: _tables = null;
1243: _frames = null;
1244: _blocks = null;
1245: _updateElements = true;
1246: }
1247:
1248: /**
1249: * Returns the base URL for this HTML segment.
1250: */
1251: URL getBaseURL() {
1252: return _baseURL;
1253: }
1254:
1255: WebResponse getResponse() {
1256: return _response;
1257: }
1258:
1259: /**
1260: * Returns the domain object model associated with this page, to be used
1261: * internally.
1262: */
1263: Node getOriginalDOM() {
1264: return getRootNode();
1265: }
1266:
1267: /**
1268: * Returns the frames found in the page in the order in which they appear.
1269: *
1270: * @return Frames
1271: */
1272: public WebFrame[] getFrames() {
1273: if (_frames == null) {
1274: loadElements();
1275: _frames = (WebFrame[]) _frameList
1276: .toArray(new WebFrame[_frameList.size()]);
1277: }
1278: return _frames;
1279: }
1280:
1281: // ---------------------------------- private members
1282: // --------------------------------
1283:
1284: Node getRootNode() {
1285: if (_rootNode == null) {
1286: throw new IllegalStateException(
1287: "The root node has not been specified");
1288: }
1289: return _rootNode;
1290: }
1291:
1292: /**
1293: * Returns the table with the specified text in its summary attribute.
1294: */
1295: private WebTable getTableSatisfyingPredicate(WebTable[] tables,
1296: HTMLElementPredicate predicate, Object value) {
1297: for (int i = 0; i < tables.length; i++) {
1298: if (predicate.matchesCriteria(tables[i], value)) {
1299: return tables[i];
1300: } else {
1301: for (int j = 0; j < tables[i].getRowCount(); j++) {
1302: for (int k = 0; k < tables[i].getColumnCount(); k++) {
1303: TableCell cell = tables[i].getTableCell(j, k);
1304: if (cell != null) {
1305: WebTable[] innerTables = cell.getTables();
1306: if (innerTables.length != 0) {
1307: WebTable result = getTableSatisfyingPredicate(
1308: innerTables, predicate, value);
1309: if (result != null) {
1310: return result;
1311: }
1312: }
1313: }
1314: }
1315: }
1316: }
1317: }
1318: return null;
1319: }
1320:
1321: /**
1322: * Returns the tables which match the specified criteria.
1323: */
1324: private WebTable[] getTablesSatisfyingPredicate(WebTable[] tables,
1325: HTMLElementPredicate predicate, Object value) {
1326: ArrayList matches = new ArrayList();
1327: for (int i = 0; i < tables.length; i++) {
1328: if (predicate.matchesCriteria(tables[i], value)) {
1329: matches.add(tables[i]);
1330: }
1331: for (int j = 0; j < tables[i].getRowCount(); j++) {
1332: for (int k = 0; k < tables[i].getColumnCount(); k++) {
1333: TableCell cell = tables[i].getTableCell(j, k);
1334: if (cell != null) {
1335: WebTable[] innerTables = cell.getTables();
1336: if (innerTables.length != 0) {
1337: WebTable[] result = getTablesSatisfyingPredicate(
1338: innerTables, predicate, value);
1339: if (result != null && result.length > 0) {
1340: for (int l = 0; l < result.length; l++) {
1341: matches.add(result[l]);
1342: }
1343: }
1344: }
1345: }
1346: }
1347: }
1348: }
1349: if (matches.size() > 0) {
1350: return (WebTable[]) matches.toArray(new WebTable[matches
1351: .size()]);
1352: } else {
1353: return null;
1354: }
1355: }
1356:
1357: class WebIFrame extends WebFrame implements ContentConcealer {
1358:
1359: /**
1360: * Constructor
1361: *
1362: * @param baseURL
1363: * @param frameNode
1364: * @param parentFrame
1365: */
1366: public WebIFrame(URL baseURL, Node frameNode,
1367: FrameSelector parentFrame) {
1368: super (_response, baseURL, frameNode, parentFrame);
1369: }
1370: }
1371:
1372: class NoScriptElement extends HTMLElementBase implements
1373: ContentConcealer {
1374:
1375: /**
1376: * Constructor
1377: *
1378: * @param node
1379: */
1380: public NoScriptElement(Node node) {
1381: super (node);
1382: }
1383:
1384: protected ScriptableDelegate newScriptable() {
1385: return null;
1386: }
1387:
1388: protected ScriptableDelegate getParentDelegate() {
1389: return null;
1390: }
1391: }
1392:
1393: }
|