001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057: package org.apache.html.dom;
058:
059: import java.io.*;
060: import java.util.*;
061: import java.lang.reflect.*;
062: import org.w3c.dom.*;
063: import org.w3c.dom.html.*;
064: import org.apache.xerces.dom.DocumentImpl;
065: import org.apache.xerces.dom.NodeImpl;
066: import org.apache.xerces.dom.AttrImpl;
067: import org.w3c.dom.DOMException;
068:
069: /**
070: * Implements an HTML document. Provides access to the top level element in the
071: * document, its body and title.
072: * <P>
073: * Several methods create new nodes of all basic types (comment, text, element,
074: * etc.). These methods create new nodes but do not place them in the document
075: * tree. The nodes may be placed in the document tree using {@link
076: * org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or
077: * they may be placed in some other document tree.
078: * <P>
079: * Note: <FRAMESET> documents are not supported at the moment, neither
080: * are direct document writing ({@link #open}, {@link #write}) and HTTP attribute
081: * methods ({@link #getURL}, {@link #getCookie}).
082: *
083: *
084: * @version $Revision: 1.8 $ $Date: 2000/10/04 08:53:21 $
085: * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
086: * @see org.w3c.dom.html.HTMLDocument
087: */
088: public class HTMLDocumentImpl extends DocumentImpl implements
089: HTMLDocument {
090:
091: /**
092: * Holds {@link HTMLCollectionImpl} object with live collection of all
093: * anchors in document. This reference is on demand only once.
094: */
095: private HTMLCollectionImpl _anchors;
096:
097: /**
098: * Holds {@link HTMLCollectionImpl} object with live collection of all
099: * forms in document. This reference is on demand only once.
100: */
101: private HTMLCollectionImpl _forms;
102:
103: /**
104: * Holds {@link HTMLCollectionImpl} object with live collection of all
105: * images in document. This reference is on demand only once.
106: */
107: private HTMLCollectionImpl _images;
108:
109: /**
110: * Holds {@link HTMLCollectionImpl} object with live collection of all
111: * links in document. This reference is on demand only once.
112: */
113: private HTMLCollectionImpl _links;
114:
115: /**
116: * Holds {@link HTMLCollectionImpl} object with live collection of all
117: * applets in document. This reference is on demand only once.
118: */
119: private HTMLCollectionImpl _applets;
120:
121: /**
122: * Holds string writer used by direct manipulation operation ({@link #open}.
123: * {@link #write}, etc) to write new contents into the document and parse
124: * that text into a document tree.
125: */
126: private StringWriter _writer;
127:
128: /**
129: * Holds names and classes of HTML element types. When an element with a
130: * particular tag name is created, the matching {@link java.lang.Class}
131: * is used to create the element object. For example, <A> matches
132: * {@link HTMLAnchorElementImpl}. This static table is shared across all
133: * HTML documents.
134: *
135: * @see #createElement
136: */
137: private static Hashtable _elementTypesHTML;
138:
139: /**
140: * Signature used to locate constructor of HTML element classes. This
141: * static array is shared across all HTML documents.
142: *
143: * @see #createElement
144: */
145: private static final Class[] _elemClassSigHTML = new Class[] {
146: HTMLDocumentImpl.class, String.class };
147:
148: /**
149: */
150: public HTMLDocumentImpl() {
151: super ();
152: populateElementTypes();
153: }
154:
155: public synchronized Element getDocumentElement() {
156: Node html;
157: Node child;
158: Node next;
159:
160: // The document element is the top-level HTML element of the HTML
161: // document. Only this element should exist at the top level.
162: // If the HTML element is found, all other elements that might
163: // precede it are placed inside the HTML element.
164: html = getFirstChild();
165: while (html != null) {
166: if (html instanceof HTMLHtmlElement) {
167: synchronized (html) {
168: child = getFirstChild();
169: while (child != null && child != html) {
170: next = child.getNextSibling();
171: html.appendChild(child);
172: child = next;
173: }
174: }
175: return (HTMLElement) html;
176: }
177: html = html.getNextSibling();
178: }
179:
180: // HTML element must exist. Create a new element and dump the
181: // entire contents of the document into it in the same order as
182: // they appear now.
183: html = new HTMLHtmlElementImpl(this , "HTML");
184: child = getFirstChild();
185: while (child != null) {
186: next = child.getNextSibling();
187: html.appendChild(child);
188: child = next;
189: }
190: appendChild(html);
191: return (HTMLElement) html;
192: }
193:
194: /**
195: * Obtains the <HEAD> element in the document, creating one if does
196: * not exist before. The <HEAD> element is the first element in the
197: * <HTML> in the document. The <HTML> element is obtained by
198: * calling {@link #getDocumentElement}. If the element does not exist, one
199: * is created.
200: * <P>
201: * Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and
202: * {@link #setBody} to assure the document has the <HEAD> element
203: * correctly placed.
204: *
205: * @return The <HEAD> element
206: */
207: public synchronized HTMLElement getHead() {
208: Node head;
209: Node html;
210: Node child;
211: Node next;
212:
213: // Call getDocumentElement() to get the HTML element that is also the
214: // top-level element in the document. Get the first element in the
215: // document that is called HEAD. Work with that.
216: html = getDocumentElement();
217: synchronized (html) {
218: head = html.getFirstChild();
219: while (head != null && !(head instanceof HTMLHeadElement))
220: head = head.getNextSibling();
221: // HEAD exists but might not be first element in HTML: make sure
222: // it is and return it.
223: if (head != null) {
224: synchronized (head) {
225: child = html.getFirstChild();
226: while (child != null && child != head) {
227: next = child.getNextSibling();
228: head.insertBefore(child, head.getFirstChild());
229: child = next;
230: }
231: }
232: return (HTMLElement) head;
233: }
234:
235: // Head does not exist, create a new one, place it at the top of the
236: // HTML element and return it.
237: head = new HTMLHeadElementImpl(this , "HEAD");
238: html.insertBefore(head, html.getFirstChild());
239: }
240: return (HTMLElement) head;
241: }
242:
243: public synchronized String getTitle() {
244: HTMLElement head;
245: NodeList list;
246: Node title;
247:
248: // Get the HEAD element and look for the TITLE element within.
249: // When found, make sure the TITLE is a direct child of HEAD,
250: // and return the title's text (the Text node contained within).
251: head = getHead();
252: title = head.getElementsByTagName("TITLE").item(0);
253: list = head.getElementsByTagName("TITLE");
254: if (list.getLength() > 0) {
255: title = list.item(0);
256: return ((HTMLTitleElement) title).getText();
257: }
258: // No TITLE found, return an empty string.
259: return "";
260: }
261:
262: public synchronized void setTitle(String newTitle) {
263: HTMLElement head;
264: NodeList list;
265: Node title;
266:
267: // Get the HEAD element and look for the TITLE element within.
268: // When found, make sure the TITLE is a direct child of HEAD,
269: // and set the title's text (the Text node contained within).
270: head = getHead();
271: list = head.getElementsByTagName("TITLE");
272: if (list.getLength() > 0) {
273: title = list.item(0);
274: if (title.getParentNode() != head)
275: head.appendChild(title);
276: ((HTMLTitleElement) title).setText(newTitle);
277: } else {
278: // No TITLE found, create a new element and place it at the end
279: // of the HEAD element.
280: title = new HTMLTitleElementImpl(this , "TITLE");
281: ((HTMLTitleElement) title).setText(newTitle);
282: head.appendChild(title);
283: }
284: }
285:
286: public synchronized HTMLElement getBody() {
287: Node html;
288: Node head;
289: Node body;
290: Node child;
291: Node next;
292:
293: // Call getDocumentElement() to get the HTML element that is also the
294: // top-level element in the document. Get the first element in the
295: // document that is called BODY. Work with that.
296: html = getDocumentElement();
297: head = getHead();
298: synchronized (html) {
299: body = head.getNextSibling();
300: while (body != null && !(body instanceof HTMLBodyElement)
301: && !(body instanceof HTMLFrameSetElement))
302: body = body.getNextSibling();
303:
304: // BODY/FRAMESET exists but might not be second element in HTML
305: // (after HEAD): make sure it is and return it.
306: if (body != null) {
307: synchronized (body) {
308: child = head.getNextSibling();
309: while (child != null && child != body) {
310: next = child.getNextSibling();
311: body.insertBefore(child, body.getFirstChild());
312: child = next;
313: }
314: }
315: return (HTMLElement) body;
316: }
317:
318: // BODY does not exist, create a new one, place it in the HTML element
319: // right after the HEAD and return it.
320: body = new HTMLBodyElementImpl(this , "BODY");
321: html.appendChild(body);
322: }
323: return (HTMLElement) body;
324: }
325:
326: public synchronized void setBody(HTMLElement newBody) {
327: Node html;
328: Node body;
329: Node head;
330: Node child;
331: NodeList list;
332:
333: synchronized (newBody) {
334: // Call getDocumentElement() to get the HTML element that is also the
335: // top-level element in the document. Get the first element in the
336: // document that is called BODY. Work with that.
337: html = getDocumentElement();
338: head = getHead();
339: synchronized (html) {
340: list = this .getElementsByTagName("BODY");
341: if (list.getLength() > 0) {
342: // BODY exists but might not follow HEAD in HTML. If not,
343: // make it so and replce it. Start with the HEAD and make
344: // sure the BODY is the first element after the HEAD.
345: body = list.item(0);
346: synchronized (body) {
347: child = head;
348: while (child != null) {
349: if (child instanceof Element) {
350: if (child != body)
351: html.insertBefore(newBody, child);
352: else
353: html.replaceChild(newBody, body);
354: return;
355: }
356: child = child.getNextSibling();
357: }
358: html.appendChild(newBody);
359: }
360: return;
361: }
362: // BODY does not exist, place it in the HTML element
363: // right after the HEAD.
364: html.appendChild(newBody);
365: }
366: }
367: }
368:
369: public synchronized Element getElementById(String elementId) {
370: return getElementById(elementId, this );
371: }
372:
373: public NodeList getElementsByName(String elementName) {
374: return new NameNodeListImpl(this , elementName);
375: }
376:
377: public final NodeList getElementsByTagName(String tagName) {
378: return super .getElementsByTagName(tagName.toUpperCase());
379: }
380:
381: public final NodeList getElementsByTagNameNS(String namespaceURI,
382: String localName) {
383: if (namespaceURI != null && namespaceURI.length() > 0)
384: return super .getElementsByTagNameNS(namespaceURI, localName
385: .toUpperCase());
386: else
387: return super .getElementsByTagName(localName.toUpperCase());
388: }
389:
390: public Element createElementNS(String namespaceURI,
391: String qualifiedName) {
392: if (namespaceURI == null || namespaceURI.length() == 0)
393: return createElement(qualifiedName);
394: else
395: return super .createElementNS(namespaceURI, qualifiedName);
396: }
397:
398: public Element createElement(String tagName) throws DOMException {
399: Class elemClass;
400: Constructor cnst;
401:
402: // First, make sure tag name is all upper case, next get the associated
403: // element class. If no class is found, generate a generic HTML element.
404: // Do so also if an unexpected exception occurs.
405: tagName = tagName.toUpperCase();
406: elemClass = (Class) _elementTypesHTML.get(tagName);
407: if (elemClass != null) {
408: // Get the constructor for the element. The signature specifies an
409: // owner document and a tag name. Use the constructor to instantiate
410: // a new object and return it.
411: try {
412: cnst = elemClass.getConstructor(_elemClassSigHTML);
413: return (Element) cnst.newInstance(new Object[] { this ,
414: tagName });
415: } catch (Exception except) {
416: Throwable thrw;
417:
418: if (except instanceof java.lang.reflect.InvocationTargetException)
419: thrw = ((java.lang.reflect.InvocationTargetException) except)
420: .getTargetException();
421: else
422: thrw = except;
423: // System.out.println( "Exception " + thrw.getClass().getName() );
424: // System.out.println( thrw.getMessage() );
425:
426: throw new IllegalStateException(
427: "HTM15 Tag '"
428: + tagName
429: + "' associated with an Element class that failed to construct.\n"
430: + tagName);
431: }
432: }
433: return new HTMLElementImpl(this , tagName);
434: }
435:
436: /**
437: * Creates an Attribute having this Document as its OwnerDoc.
438: * Overrides {@link DocumentImpl#createAttribute} and returns
439: * and attribute whose name is lower case.
440: *
441: * @param name The name of the attribute
442: * @return An attribute whose name is all lower case
443: * @throws DOMException(INVALID_NAME_ERR) if the attribute name
444: * is not acceptable
445: */
446: public Attr createAttribute(String name) throws DOMException {
447: return super .createAttribute(name.toLowerCase());
448: }
449:
450: public String getReferrer() {
451: // Information not available on server side.
452: return null;
453: }
454:
455: public String getDomain() {
456: // Information not available on server side.
457: return null;
458: }
459:
460: public String getURL() {
461: // Information not available on server side.
462: return null;
463: }
464:
465: public String getCookie() {
466: // Information not available on server side.
467: return null;
468: }
469:
470: public void setCookie(String cookie) {
471: // Information not available on server side.
472: }
473:
474: public HTMLCollection getImages() {
475: // For more information see HTMLCollection#collectionMatch
476: if (_images == null)
477: _images = new HTMLCollectionImpl(getBody(),
478: HTMLCollectionImpl.IMAGE);
479: return _images;
480: }
481:
482: public HTMLCollection getApplets() {
483: // For more information see HTMLCollection#collectionMatch
484: if (_applets == null)
485: _applets = new HTMLCollectionImpl(getBody(),
486: HTMLCollectionImpl.APPLET);
487: return _applets;
488: }
489:
490: public HTMLCollection getLinks() {
491: // For more information see HTMLCollection#collectionMatch
492: if (_links == null)
493: _links = new HTMLCollectionImpl(getBody(),
494: HTMLCollectionImpl.LINK);
495: return _links;
496: }
497:
498: public HTMLCollection getForms() {
499: // For more information see HTMLCollection#collectionMatch
500: if (_forms == null)
501: _forms = new HTMLCollectionImpl(getBody(),
502: HTMLCollectionImpl.FORM);
503: return _forms;
504: }
505:
506: public HTMLCollection getAnchors() {
507: // For more information see HTMLCollection#collectionMatch
508: if (_anchors == null)
509: _anchors = new HTMLCollectionImpl(getBody(),
510: HTMLCollectionImpl.ANCHOR);
511: return _anchors;
512: }
513:
514: public void open() {
515: // When called an in-memory is prepared. The document tree is still
516: // accessible the old way, until this writer is closed.
517: if (_writer == null)
518: _writer = new StringWriter();
519: }
520:
521: public void close() {
522: // ! NOT IMPLEMENTED, REQUIRES PARSER !
523: if (_writer != null) {
524: _writer = null;
525: }
526: }
527:
528: public void write(String text) {
529: // Write a string into the in-memory writer.
530: if (_writer != null)
531: _writer.write(text);
532: }
533:
534: public void writeln(String text) {
535: // Write a line into the in-memory writer.
536: if (_writer != null)
537: _writer.write(text + "\n");
538: }
539:
540: public Node cloneNode(boolean deep) {
541: HTMLDocumentImpl clone;
542: NodeImpl node;
543:
544: clone = new HTMLDocumentImpl();
545: if (deep) {
546: node = (NodeImpl) getFirstChild();
547: while (node != null) {
548: clone.appendChild(clone.importNode(node, true));
549: node = (NodeImpl) node.getNextSibling();
550: }
551: }
552: return clone;
553: }
554:
555: /**
556: * Recursive method retreives an element by its <code>id</code> attribute.
557: * Called by {@link #getElementById(String)}.
558: *
559: * @param elementId The <code>id</code> value to look for
560: * @return The node in which to look for
561: */
562: private Element getElementById(String elementId, Node node) {
563: Node child;
564: Element result;
565:
566: child = node.getFirstChild();
567: while (child != null) {
568: if (child instanceof Element) {
569: if (elementId.equals(((Element) child)
570: .getAttribute("id")))
571: return (Element) child;
572: result = getElementById(elementId, child);
573: if (result != null)
574: return result;
575: }
576: child = child.getNextSibling();
577: }
578: return null;
579: }
580:
581: /**
582: * Called by the constructor to populate the element types list (see {@link
583: * #_elementTypesHTML}). Will be called multiple times but populate the list
584: * only the first time. Replacement for static constructor.
585: */
586: private static void populateElementTypes() {
587: // This class looks like it is due to some strange
588: // (read: inconsistent) JVM bugs.
589: // Initially all this code was placed in the static constructor,
590: // but that caused some early JVMs (1.1) to go mad, and if a
591: // class could not be found (as happened during development),
592: // the JVM would die.
593: // Bertrand Delacretaz <bdelacretaz@worldcom.ch> pointed out
594: // several configurations where HTMLAnchorElementImpl.class
595: // failed, forcing me to revert back to Class.forName().
596:
597: if (_elementTypesHTML != null)
598: return;
599: _elementTypesHTML = new Hashtable(63);
600: populateElementType("A", "HTMLAnchorElementImpl");
601: populateElementType("APPLET", "HTMLAppletElementImpl");
602: populateElementType("AREA", "HTMLAreaElementImpl");
603: populateElementType("BASE", "HTMLBaseElementImpl");
604: populateElementType("BASEFONT", "HTMLBaseFontElementImpl");
605: populateElementType("BLOCKQUOTE", "HTMLQuoteElementImpl");
606: populateElementType("BODY", "HTMLBodyElementImpl");
607: populateElementType("BR", "HTMLBRElementImpl");
608: populateElementType("BUTTON", "HTMLButtonElementImpl");
609: populateElementType("DEL", "HTMLModElementImpl");
610: populateElementType("DIR", "HTMLDirectoryElementImpl");
611: populateElementType("DIV", "HTMLDivElementImpl");
612: populateElementType("DL", "HTMLDListElementImpl");
613: populateElementType("FIELDSET", "HTMLFieldSetElementImpl");
614: populateElementType("FONT", "HTMLFontElementImpl");
615: populateElementType("FORM", "HTMLFormElementImpl");
616: populateElementType("FRAME", "HTMLFrameElementImpl");
617: populateElementType("FRAMESET", "HTMLFrameSetElementImpl");
618: populateElementType("HEAD", "HTMLHeadElementImpl");
619: populateElementType("H1", "HTMLHeadingElementImpl");
620: populateElementType("H2", "HTMLHeadingElementImpl");
621: populateElementType("H3", "HTMLHeadingElementImpl");
622: populateElementType("H4", "HTMLHeadingElementImpl");
623: populateElementType("H5", "HTMLHeadingElementImpl");
624: populateElementType("H6", "HTMLHeadingElementImpl");
625: populateElementType("HR", "HTMLHRElementImpl");
626: populateElementType("HTML", "HTMLHtmlElementImpl");
627: populateElementType("IFRAME", "HTMLIFrameElementImpl");
628: populateElementType("IMG", "HTMLImageElementImpl");
629: populateElementType("INPUT", "HTMLInputElementImpl");
630: populateElementType("INS", "HTMLModElementImpl");
631: populateElementType("ISINDEX", "HTMLIsIndexElementImpl");
632: populateElementType("LABEL", "HTMLLabelElementImpl");
633: populateElementType("LEGEND", "HTMLLegendElementImpl");
634: populateElementType("LI", "HTMLLIElementImpl");
635: populateElementType("LINK", "HTMLLinkElementImpl");
636: populateElementType("MAP", "HTMLMapElementImpl");
637: populateElementType("MENU", "HTMLMenuElementImpl");
638: populateElementType("META", "HTMLMetaElementImpl");
639: populateElementType("OBJECT", "HTMLObjectElementImpl");
640: populateElementType("OL", "HTMLOListElementImpl");
641: populateElementType("OPTGROUP", "HTMLOptGroupElementImpl");
642: populateElementType("OPTION", "HTMLOptionElementImpl");
643: populateElementType("P", "HTMLParagraphElementImpl");
644: populateElementType("PARAM", "HTMLParamElementImpl");
645: populateElementType("PRE", "HTMLPreElementImpl");
646: populateElementType("Q", "HTMLQuoteElementImpl");
647: populateElementType("SCRIPT", "HTMLScriptElementImpl");
648: populateElementType("SELECT", "HTMLSelectElementImpl");
649: populateElementType("STYLE", "HTMLStyleElementImpl");
650: populateElementType("TABLE", "HTMLTableElementImpl");
651: populateElementType("CAPTION", "HTMLTableCaptionElementImpl");
652: populateElementType("TD", "HTMLTableCellElementImpl");
653: populateElementType("TH", "HTMLTableCellElementImpl");
654: populateElementType("COL", "HTMLTableColElementImpl");
655: populateElementType("COLGROUP", "HTMLTableColElementImpl");
656: populateElementType("TR", "HTMLTableRowElementImpl");
657: populateElementType("TBODY", "HTMLTableSectionElementImpl");
658: populateElementType("THEAD", "HTMLTableSectionElementImpl");
659: populateElementType("TFOOT", "HTMLTableSectionElementImpl");
660: populateElementType("TEXTAREA", "HTMLTextAreaElementImpl");
661: populateElementType("TITLE", "HTMLTitleElementImpl");
662: populateElementType("UL", "HTMLUListElementImpl");
663: }
664:
665: private static void populateElementType(String tagName,
666: String className) {
667: try {
668: _elementTypesHTML.put(tagName, Class
669: .forName("org.apache.html.dom." + className));
670: } catch (ClassNotFoundException except) {
671: new RuntimeException(
672: "HTM019 OpenXML Error: Could not find class "
673: + className + " implementing HTML element "
674: + tagName + "\n" + className + "\t"
675: + tagName);
676: }
677: }
678:
679: }
|