001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057: package org.apache.html.dom;
058:
059: import org.w3c.dom.*;
060: import org.w3c.dom.html.*;
061:
062: /**
063: * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
064: * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
065: * look for is identified in the constructor by code. This collection is not
066: * optimized for traversing large trees.
067: * <p>
068: * The collection has to meet two requirements: it has to be live, and it has
069: * to traverse depth first and always return results in that order. As such,
070: * using an object container (such as {@link java.util.Vector}) is expensive on
071: * insert/remove operations. Instead, the collection has been implemented using
072: * three traversing functions. As a result, operations on large documents will
073: * result in traversal of the entire document tree and consume a considerable
074: * amount of time.
075: * <p>
076: * Note that synchronization on the traversed document cannot be achieved.
077: * The document itself cannot be locked, and locking each traversed node is
078: * likely to lead to a dead lock condition. Therefore, there is a chance of the
079: * document being changed as results are fetched; in all likelihood, the results
080: * might be out dated, but not erroneous.
081: *
082: *
083: * @version $Revision: 1.6 $ $Date: 2001/01/30 23:57:52 $
084: * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
085: * @see org.w3c.dom.html.HTMLCollection
086: */
087: class HTMLCollectionImpl implements HTMLCollection {
088:
089: /**
090: * Request collection of all anchors in document: <A> elements that
091: * have a <code>name</code> attribute.
092: */
093: static final short ANCHOR = 1;
094:
095: /**
096: * Request collection of all forms in document: <FORM> elements.
097: */
098: static final short FORM = 2;
099:
100: /**
101: * Request collection of all images in document: <IMAGE> elements.
102: */
103: static final short IMAGE = 3;
104:
105: /**
106: * Request collection of all Applets in document: <APPLET> and
107: * <OBJECT> elements (<OBJECT> must contain an Applet).
108: */
109: static final short APPLET = 4;
110:
111: /**
112: * Request collection of all links in document: <A> and <AREA>
113: * elements (must have a <code>href</code> attribute).
114: */
115: static final short LINK = 5;
116:
117: /**
118: * Request collection of all options in selection: <OPTION> elments in
119: * <SELECT> or <OPTGROUP>.
120: */
121: static final short OPTION = 6;
122:
123: /**
124: * Request collection of all rows in table: <TR> elements in table or
125: * table section.
126: */
127: static final short ROW = 7;
128:
129: /**
130: * Request collection of all form elements: <INPUT>, <BUTTON>,
131: * <SELECT>, <TEXT> and <TEXTAREA> elements inside form
132: * <FORM>.
133: */
134: static final short ELEMENT = 8;
135:
136: /**
137: * Request collection of all areas in map: <AREA> element in <MAP>
138: * (non recursive).
139: */
140: static final short AREA = -1;
141:
142: /**
143: * Request collection of all table bodies in table: <TBODY> element in
144: * table <TABLE> (non recursive).
145: */
146: static final short TBODY = -2;
147:
148: /**
149: * Request collection of all cells in row: <TD> elements in <TR>
150: * (non recursive).
151: */
152: static final short CELL = -3;
153:
154: /**
155: * Indicates what this collection is looking for. Holds one of the enumerated
156: * values and used by {@link #collectionMatch}. Set by the constructor and
157: * determine the collection's use for its life time.
158: */
159: private short _lookingFor;
160:
161: /**
162: * This is the top level element underneath which the collection exists.
163: */
164: private Element _topLevel;
165:
166: /**
167: * Construct a new collection that retrieves element of the specific type
168: * (<code>lookingFor</code>) from the specific document portion
169: * (<code>topLevel</code>).
170: *
171: * @param topLevel The element underneath which the collection exists
172: * @param lookingFor Code indicating what elements to look for
173: */
174: HTMLCollectionImpl(HTMLElement topLevel, short lookingFor) {
175: if (topLevel == null)
176: throw new NullPointerException(
177: "HTM011 Argument 'topLevel' is null.");
178: _topLevel = topLevel;
179: _lookingFor = lookingFor;
180: }
181:
182: /**
183: * Returns the length of the collection. This method might traverse the
184: * entire document tree.
185: *
186: * @return Length of the collection
187: */
188: public final int getLength() {
189: // Call recursive function on top-level element.
190: return getLength(_topLevel);
191: }
192:
193: /**
194: * Retrieves the indexed node from the collection. Nodes are numbered in
195: * tree order - depth-first traversal order. This method might traverse
196: * the entire document tree.
197: *
198: * @param index The index of the node to return
199: * @return The specified node or null if no such node found
200: */
201: public final Node item(int index) {
202: if (index < 0)
203: throw new IllegalArgumentException(
204: "HTM012 Argument 'index' is negative.");
205: // Call recursive function on top-level element.
206: return item(_topLevel, new CollectionIndex(index));
207: }
208:
209: /**
210: * Retrieves the named node from the collection. The name is matched case
211: * sensitive against the <TT>id</TT> attribute of each element in the
212: * collection, returning the first match. The tree is traversed in
213: * depth-first order. This method might traverse the entire document tree.
214: *
215: * @param name The name of the node to return
216: * @return The specified node or null if no such node found
217: */
218: public final Node namedItem(String name) {
219: if (name == null)
220: throw new NullPointerException(
221: "HTM013 Argument 'name' is null.");
222: // Call recursive function on top-level element.
223: return namedItem(_topLevel, name);
224: }
225:
226: /**
227: * Recursive function returns the number of elements of a particular type
228: * that exist under the top level element. This is a recursive function
229: * and the top level element is passed along.
230: *
231: * @param topLevel Top level element from which to scan
232: * @return Number of elements
233: */
234: private int getLength(Element topLevel) {
235: int length;
236: Node node;
237:
238: synchronized (topLevel) {
239: // Always count from zero and traverse all the childs of the
240: // current element in the order they appear.
241: length = 0;
242: node = topLevel.getFirstChild();
243: while (node != null) {
244: // If a particular node is an element (could be HTML or XML),
245: // do two things: if it's the one we're looking for, count
246: // another matched element; at any rate, traverse it's
247: // children as well.
248: if (node instanceof Element) {
249: if (collectionMatch((Element) node, null))
250: ++length;
251: else if (recurse())
252: length += getLength((Element) node);
253: }
254: node = node.getNextSibling();
255: }
256: }
257: return length;
258: }
259:
260: /**
261: * Recursive function returns the numbered element of a particular type
262: * that exist under the top level element. This is a recursive function
263: * and the top level element is passed along.
264: * <p>
265: * Note that this function must call itself with an index and get back both
266: * the element (if one was found) and the new index which is decremeneted
267: * for any like element found. Since integers are only passed by value,
268: * this function makes use of a separate class ({@link CollectionIndex})
269: * to hold that index.
270: *
271: * @param topLevel Top level element from which to scan
272: * @param index The index of the item to retreive
273: * @return Number of elements
274: * @see CollectionIndex
275: */
276: private Node item(Element topLevel, CollectionIndex index) {
277: Node node;
278: Node result;
279:
280: synchronized (topLevel) {
281: // Traverse all the childs of the current element in the order
282: // they appear. Count from the index backwards until you reach
283: // matching element with an index of zero. Return that element.
284: node = topLevel.getFirstChild();
285: while (node != null) {
286: // If a particular node is an element (could be HTML or XML),
287: // do two things: if it's the one we're looking for, decrease
288: // the index and if zero, return this node; at any rate,
289: // traverse it's children as well.
290: if (node instanceof Element) {
291: if (collectionMatch((Element) node, null)) {
292: if (index.isZero())
293: return node;
294: index.decrement();
295: } else if (recurse()) {
296: result = item((Element) node, index);
297: if (result != null)
298: return result;
299: }
300: }
301: node = node.getNextSibling();
302: }
303: }
304: return null;
305: }
306:
307: /**
308: * Recursive function returns an element of a particular type with the
309: * specified name (<TT>id</TT> attribute).
310: *
311: * @param topLevel Top level element from which to scan
312: * @param name The named element to look for
313: * @return The first named element found
314: */
315: private Node namedItem(Element topLevel, String name) {
316: Node node;
317: Node result;
318:
319: synchronized (topLevel) {
320: // Traverse all the childs of the current element in the order
321: // they appear.
322: node = topLevel.getFirstChild();
323: while (node != null) {
324: // If a particular node is an element (could be HTML or XML),
325: // do two things: if it's the one we're looking for, and the
326: // name (id attribute) attribute is the one we're looking for,
327: // return this element; otherwise, traverse it's children.
328: if (node instanceof Element) {
329: if (collectionMatch((Element) node, name))
330: return node;
331: else if (recurse()) {
332: result = namedItem((Element) node, name);
333: if (result != null)
334: return result;
335: }
336: }
337: node = node.getNextSibling();
338: }
339: return node;
340: }
341: }
342:
343: /**
344: * Returns true if scanning methods should iterate through the collection.
345: * When looking for elements in the document, recursing is needed to traverse
346: * the full document tree. When looking inside a specific element (e.g. for a
347: * cell inside a row), recursing can lead to erroneous results.
348: *
349: * @return True if methods should recurse to traverse entire tree
350: */
351: protected boolean recurse() {
352: return _lookingFor > 0;
353: }
354:
355: /**
356: * Determines if current element matches based on what we're looking for.
357: * The element is passed along with an optional identifier name. If the
358: * element is the one we're looking for, return true. If the name is also
359: * specified, the name must match the <code>id</code> attribute
360: * (match <code>name</code> first for anchors).
361: *
362: * @param elem The current element
363: * @param name The identifier name or null
364: * @return The element matches what we're looking for
365: */
366: protected boolean collectionMatch(Element elem, String name) {
367: boolean match;
368:
369: synchronized (elem) {
370: // Begin with no matching. Depending on what we're looking for,
371: // attempt to match based on the element type. This is the quickest
372: // way to match involving only a cast. Do the expensive string
373: // comparison later on.
374: match = false;
375: switch (_lookingFor) {
376: case ANCHOR:
377: // Anchor is an <A> element with a 'name' attribute. Otherwise, it's
378: // just a link.
379: match = (elem instanceof HTMLAnchorElement)
380: && elem.getAttribute("name").length() > 0;
381: break;
382: case FORM:
383: // Any <FORM> element.
384: match = (elem instanceof HTMLFormElement);
385: break;
386: case IMAGE:
387: // Any <IMG> element. <OBJECT> elements with images are not returned.
388: match = (elem instanceof HTMLImageElement);
389: break;
390: case APPLET:
391: // Any <APPLET> element, and any <OBJECT> element which represents an
392: // Applet. This is determined by 'codetype' attribute being
393: // 'application/java' or 'classid' attribute starting with 'java:'.
394: match = (elem instanceof HTMLAppletElement)
395: || (elem instanceof HTMLObjectElement && ("application/java"
396: .equals(elem.getAttribute("codetype")) || elem
397: .getAttribute("classid").startsWith(
398: "java:")));
399: break;
400: case ELEMENT:
401: // All form elements implement HTMLFormControl for easy identification.
402: match = (elem instanceof HTMLFormControl);
403: break;
404: case LINK:
405: // Any <A> element, and any <AREA> elements with an 'href' attribute.
406: match = ((elem instanceof HTMLAnchorElement || elem instanceof HTMLAreaElement) && elem
407: .getAttribute("href").length() > 0);
408: break;
409: case AREA:
410: // Any <AREA> element.
411: match = (elem instanceof HTMLAreaElement);
412: break;
413: case OPTION:
414: // Any <OPTION> element.
415: match = (elem instanceof HTMLOptionElement);
416: break;
417: case ROW:
418: // Any <TR> element.
419: match = (elem instanceof HTMLTableRowElement);
420: break;
421: case TBODY:
422: // Any <TBODY> element (one of three table section types).
423: match = (elem instanceof HTMLTableSectionElement && elem
424: .getTagName().equals("tbody"));
425: break;
426: case CELL:
427: // Any <TD> element.
428: match = (elem instanceof HTMLTableCellElement);
429: break;
430: }
431:
432: // If element type was matched and a name was specified, must also match
433: // the name against either the 'id' or the 'name' attribute. The 'name'
434: // attribute is relevant only for <A> elements for backward compatibility.
435: if (match && name != null) {
436: // If an anchor and 'name' attribute matches, return true. Otherwise,
437: // try 'id' attribute.
438: if (elem instanceof HTMLAnchorElement
439: && name.equals(elem.getAttribute("name")))
440: return true;
441: match = name.equals(elem.getAttribute("id"));
442: }
443: }
444: return match;
445: }
446:
447: }
448:
449: /**
450: * {@link CollectionImpl#item} must traverse down the tree and decrement the
451: * index until it matches an element who's index is zero. Since integers are
452: * passed by value, this class servers to pass the index into each recursion
453: * by reference. It encompasses all the operations that need be performed on
454: * the index, although direct access is possible.
455: *
456: * @see CollectionImpl#item
457: */
458: class CollectionIndex {
459:
460: /**
461: * Returns the current index.
462: *
463: * @return Current index
464: */
465: int getIndex() {
466: return _index;
467: }
468:
469: /**
470: * Decrements the index by one.
471: */
472: void decrement() {
473: --_index;
474: }
475:
476: /**
477: * Returns true if index is zero (or negative).
478: *
479: * @return True if index is zero
480: */
481: boolean isZero() {
482: return _index <= 0;
483: }
484:
485: /**
486: * Constructs a new index with the specified initial value. The index will
487: * then be decremeneted until it reaches zero.
488: *
489: * @param index The initial value
490: */
491: CollectionIndex(int index) {
492: _index = index;
493: }
494:
495: /**
496: * Holds the actual value that is passed by reference using this class.
497: */
498: private int _index;
499:
500: }
|