001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.html.dom;
019:
020: import org.w3c.dom.Element;
021: import org.w3c.dom.Node;
022: import org.w3c.dom.html.HTMLAnchorElement;
023: import org.w3c.dom.html.HTMLAppletElement;
024: import org.w3c.dom.html.HTMLAreaElement;
025: import org.w3c.dom.html.HTMLCollection;
026: import org.w3c.dom.html.HTMLElement;
027: import org.w3c.dom.html.HTMLFormElement;
028: import org.w3c.dom.html.HTMLImageElement;
029: import org.w3c.dom.html.HTMLObjectElement;
030: import org.w3c.dom.html.HTMLOptionElement;
031: import org.w3c.dom.html.HTMLTableCellElement;
032: import org.w3c.dom.html.HTMLTableRowElement;
033: import org.w3c.dom.html.HTMLTableSectionElement;
034:
035: /**
036: * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
037: * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
038: * look for is identified in the constructor by code. This collection is not
039: * optimized for traversing large trees.
040: * <p>
041: * The collection has to meet two requirements: it has to be live, and it has
042: * to traverse depth first and always return results in that order. As such,
043: * using an object container (such as {@link java.util.Vector}) is expensive on
044: * insert/remove operations. Instead, the collection has been implemented using
045: * three traversing functions. As a result, operations on large documents will
046: * result in traversal of the entire document tree and consume a considerable
047: * amount of time.
048: * <p>
049: * Note that synchronization on the traversed document cannot be achieved.
050: * The document itself cannot be locked, and locking each traversed node is
051: * likely to lead to a dead lock condition. Therefore, there is a chance of the
052: * document being changed as results are fetched; in all likelihood, the results
053: * might be out dated, but not erroneous.
054: *
055: * @xerces.internal
056: *
057: * @version $Revision: 447255 $ $Date: 2006-09-18 01:36:42 -0400 (Mon, 18 Sep 2006) $
058: * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
059: * @see org.w3c.dom.html.HTMLCollection
060: */
061: class HTMLCollectionImpl implements HTMLCollection {
062:
063: /**
064: * Request collection of all anchors in document: <A> elements that
065: * have a <code>name</code> attribute.
066: */
067: static final short ANCHOR = 1;
068:
069: /**
070: * Request collection of all forms in document: <FORM> elements.
071: */
072: static final short FORM = 2;
073:
074: /**
075: * Request collection of all images in document: <IMAGE> elements.
076: */
077: static final short IMAGE = 3;
078:
079: /**
080: * Request collection of all Applets in document: <APPLET> and
081: * <OBJECT> elements (<OBJECT> must contain an Applet).
082: */
083: static final short APPLET = 4;
084:
085: /**
086: * Request collection of all links in document: <A> and <AREA>
087: * elements (must have a <code>href</code> attribute).
088: */
089: static final short LINK = 5;
090:
091: /**
092: * Request collection of all options in selection: <OPTION> elments in
093: * <SELECT> or <OPTGROUP>.
094: */
095: static final short OPTION = 6;
096:
097: /**
098: * Request collection of all rows in table: <TR> elements in table or
099: * table section.
100: */
101: static final short ROW = 7;
102:
103: /**
104: * Request collection of all form elements: <INPUT>, <BUTTON>,
105: * <SELECT>, <TEXT> and <TEXTAREA> elements inside form
106: * <FORM>.
107: */
108: static final short ELEMENT = 8;
109:
110: /**
111: * Request collection of all areas in map: <AREA> element in <MAP>
112: * (non recursive).
113: */
114: static final short AREA = -1;
115:
116: /**
117: * Request collection of all table bodies in table: <TBODY> element in
118: * table <TABLE> (non recursive).
119: */
120: static final short TBODY = -2;
121:
122: /**
123: * Request collection of all cells in row: <TD> elements in <TR>
124: * (non recursive).
125: */
126: static final short CELL = -3;
127:
128: /**
129: * Indicates what this collection is looking for. Holds one of the enumerated
130: * values and used by {@link #collectionMatch}. Set by the constructor and
131: * determine the collection's use for its life time.
132: */
133: private short _lookingFor;
134:
135: /**
136: * This is the top level element underneath which the collection exists.
137: */
138: private Element _topLevel;
139:
140: /**
141: * Construct a new collection that retrieves element of the specific type
142: * (<code>lookingFor</code>) from the specific document portion
143: * (<code>topLevel</code>).
144: *
145: * @param topLevel The element underneath which the collection exists
146: * @param lookingFor Code indicating what elements to look for
147: */
148: HTMLCollectionImpl(HTMLElement topLevel, short lookingFor) {
149: if (topLevel == null)
150: throw new NullPointerException(
151: "HTM011 Argument 'topLevel' is null.");
152: _topLevel = topLevel;
153: _lookingFor = lookingFor;
154: }
155:
156: /**
157: * Returns the length of the collection. This method might traverse the
158: * entire document tree.
159: *
160: * @return Length of the collection
161: */
162: public final int getLength() {
163: // Call recursive function on top-level element.
164: return getLength(_topLevel);
165: }
166:
167: /**
168: * Retrieves the indexed node from the collection. Nodes are numbered in
169: * tree order - depth-first traversal order. This method might traverse
170: * the entire document tree.
171: *
172: * @param index The index of the node to return
173: * @return The specified node or null if no such node found
174: */
175: public final Node item(int index) {
176: if (index < 0)
177: throw new IllegalArgumentException(
178: "HTM012 Argument 'index' is negative.");
179: // Call recursive function on top-level element.
180: return item(_topLevel, new CollectionIndex(index));
181: }
182:
183: /**
184: * Retrieves the named node from the collection. The name is matched case
185: * sensitive against the <TT>id</TT> attribute of each element in the
186: * collection, returning the first match. The tree is traversed in
187: * depth-first order. This method might traverse the entire document tree.
188: *
189: * @param name The name of the node to return
190: * @return The specified node or null if no such node found
191: */
192: public final Node namedItem(String name) {
193: if (name == null)
194: throw new NullPointerException(
195: "HTM013 Argument 'name' is null.");
196: // Call recursive function on top-level element.
197: return namedItem(_topLevel, name);
198: }
199:
200: /**
201: * Recursive function returns the number of elements of a particular type
202: * that exist under the top level element. This is a recursive function
203: * and the top level element is passed along.
204: *
205: * @param topLevel Top level element from which to scan
206: * @return Number of elements
207: */
208: private int getLength(Element topLevel) {
209: int length;
210: Node node;
211:
212: synchronized (topLevel) {
213: // Always count from zero and traverse all the childs of the
214: // current element in the order they appear.
215: length = 0;
216: node = topLevel.getFirstChild();
217: while (node != null) {
218: // If a particular node is an element (could be HTML or XML),
219: // do two things: if it's the one we're looking for, count
220: // another matched element; at any rate, traverse it's
221: // children as well.
222: if (node instanceof Element) {
223: if (collectionMatch((Element) node, null))
224: ++length;
225: else if (recurse())
226: length += getLength((Element) node);
227: }
228: node = node.getNextSibling();
229: }
230: }
231: return length;
232: }
233:
234: /**
235: * Recursive function returns the numbered element of a particular type
236: * that exist under the top level element. This is a recursive function
237: * and the top level element is passed along.
238: * <p>
239: * Note that this function must call itself with an index and get back both
240: * the element (if one was found) and the new index which is decremeneted
241: * for any like element found. Since integers are only passed by value,
242: * this function makes use of a separate class ({@link CollectionIndex})
243: * to hold that index.
244: *
245: * @param topLevel Top level element from which to scan
246: * @param index The index of the item to retreive
247: * @return Number of elements
248: * @see CollectionIndex
249: */
250: private Node item(Element topLevel, CollectionIndex index) {
251: Node node;
252: Node result;
253:
254: synchronized (topLevel) {
255: // Traverse all the childs of the current element in the order
256: // they appear. Count from the index backwards until you reach
257: // matching element with an index of zero. Return that element.
258: node = topLevel.getFirstChild();
259: while (node != null) {
260: // If a particular node is an element (could be HTML or XML),
261: // do two things: if it's the one we're looking for, decrease
262: // the index and if zero, return this node; at any rate,
263: // traverse it's children as well.
264: if (node instanceof Element) {
265: if (collectionMatch((Element) node, null)) {
266: if (index.isZero())
267: return node;
268: index.decrement();
269: } else if (recurse()) {
270: result = item((Element) node, index);
271: if (result != null)
272: return result;
273: }
274: }
275: node = node.getNextSibling();
276: }
277: }
278: return null;
279: }
280:
281: /**
282: * Recursive function returns an element of a particular type with the
283: * specified name (<TT>id</TT> attribute).
284: *
285: * @param topLevel Top level element from which to scan
286: * @param name The named element to look for
287: * @return The first named element found
288: */
289: private Node namedItem(Element topLevel, String name) {
290: Node node;
291: Node result;
292:
293: synchronized (topLevel) {
294: // Traverse all the childs of the current element in the order
295: // they appear.
296: node = topLevel.getFirstChild();
297: while (node != null) {
298: // If a particular node is an element (could be HTML or XML),
299: // do two things: if it's the one we're looking for, and the
300: // name (id attribute) attribute is the one we're looking for,
301: // return this element; otherwise, traverse it's children.
302: if (node instanceof Element) {
303: if (collectionMatch((Element) node, name))
304: return node;
305: else if (recurse()) {
306: result = namedItem((Element) node, name);
307: if (result != null)
308: return result;
309: }
310: }
311: node = node.getNextSibling();
312: }
313: return node;
314: }
315: }
316:
317: /**
318: * Returns true if scanning methods should iterate through the collection.
319: * When looking for elements in the document, recursing is needed to traverse
320: * the full document tree. When looking inside a specific element (e.g. for a
321: * cell inside a row), recursing can lead to erroneous results.
322: *
323: * @return True if methods should recurse to traverse entire tree
324: */
325: protected boolean recurse() {
326: return _lookingFor > 0;
327: }
328:
329: /**
330: * Determines if current element matches based on what we're looking for.
331: * The element is passed along with an optional identifier name. If the
332: * element is the one we're looking for, return true. If the name is also
333: * specified, the name must match the <code>id</code> attribute
334: * (match <code>name</code> first for anchors).
335: *
336: * @param elem The current element
337: * @param name The identifier name or null
338: * @return The element matches what we're looking for
339: */
340: protected boolean collectionMatch(Element elem, String name) {
341: boolean match;
342:
343: synchronized (elem) {
344: // Begin with no matching. Depending on what we're looking for,
345: // attempt to match based on the element type. This is the quickest
346: // way to match involving only a cast. Do the expensive string
347: // comparison later on.
348: match = false;
349: switch (_lookingFor) {
350: case ANCHOR:
351: // Anchor is an <A> element with a 'name' attribute. Otherwise, it's
352: // just a link.
353: match = (elem instanceof HTMLAnchorElement)
354: && elem.getAttribute("name").length() > 0;
355: break;
356: case FORM:
357: // Any <FORM> element.
358: match = (elem instanceof HTMLFormElement);
359: break;
360: case IMAGE:
361: // Any <IMG> element. <OBJECT> elements with images are not returned.
362: match = (elem instanceof HTMLImageElement);
363: break;
364: case APPLET:
365: // Any <APPLET> element, and any <OBJECT> element which represents an
366: // Applet. This is determined by 'codetype' attribute being
367: // 'application/java' or 'classid' attribute starting with 'java:'.
368: match = (elem instanceof HTMLAppletElement)
369: || (elem instanceof HTMLObjectElement && ("application/java"
370: .equals(elem.getAttribute("codetype")) || elem
371: .getAttribute("classid").startsWith(
372: "java:")));
373: break;
374: case ELEMENT:
375: // All form elements implement HTMLFormControl for easy identification.
376: match = (elem instanceof HTMLFormControl);
377: break;
378: case LINK:
379: // Any <A> element, and any <AREA> elements with an 'href' attribute.
380: match = ((elem instanceof HTMLAnchorElement || elem instanceof HTMLAreaElement) && elem
381: .getAttribute("href").length() > 0);
382: break;
383: case AREA:
384: // Any <AREA> element.
385: match = (elem instanceof HTMLAreaElement);
386: break;
387: case OPTION:
388: // Any <OPTION> element.
389: match = (elem instanceof HTMLOptionElement);
390: break;
391: case ROW:
392: // Any <TR> element.
393: match = (elem instanceof HTMLTableRowElement);
394: break;
395: case TBODY:
396: // Any <TBODY> element (one of three table section types).
397: match = (elem instanceof HTMLTableSectionElement && elem
398: .getTagName().equals("TBODY"));
399: break;
400: case CELL:
401: // Any <TD> element.
402: match = (elem instanceof HTMLTableCellElement);
403: break;
404: }
405:
406: // If element type was matched and a name was specified, must also match
407: // the name against either the 'id' or the 'name' attribute. The 'name'
408: // attribute is relevant only for <A> elements for backward compatibility.
409: if (match && name != null) {
410: // If an anchor and 'name' attribute matches, return true. Otherwise,
411: // try 'id' attribute.
412: if (elem instanceof HTMLAnchorElement
413: && name.equals(elem.getAttribute("name")))
414: return true;
415: match = name.equals(elem.getAttribute("id"));
416: }
417: }
418: return match;
419: }
420:
421: }
422:
423: /**
424: * {@link CollectionImpl#item} must traverse down the tree and decrement the
425: * index until it matches an element who's index is zero. Since integers are
426: * passed by value, this class servers to pass the index into each recursion
427: * by reference. It encompasses all the operations that need be performed on
428: * the index, although direct access is possible.
429: *
430: * @xerces.internal
431: *
432: * @see CollectionImpl#item
433: */
434: class CollectionIndex {
435:
436: /**
437: * Returns the current index.
438: *
439: * @return Current index
440: */
441: int getIndex() {
442: return _index;
443: }
444:
445: /**
446: * Decrements the index by one.
447: */
448: void decrement() {
449: --_index;
450: }
451:
452: /**
453: * Returns true if index is zero (or negative).
454: *
455: * @return True if index is zero
456: */
457: boolean isZero() {
458: return _index <= 0;
459: }
460:
461: /**
462: * Constructs a new index with the specified initial value. The index will
463: * then be decremeneted until it reaches zero.
464: *
465: * @param index The initial value
466: */
467: CollectionIndex(int index) {
468: _index = index;
469: }
470:
471: /**
472: * Holds the actual value that is passed by reference using this class.
473: */
474: private int _index;
475:
476: }
|