001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.html.dom;
019:
020: import java.io.StringWriter;
021: import java.lang.reflect.Constructor;
022: import java.util.Hashtable;
023: import java.util.Locale;
024:
025: import org.apache.xerces.dom.DocumentImpl;
026: import org.w3c.dom.Attr;
027: import org.w3c.dom.DOMException;
028: import org.w3c.dom.Element;
029: import org.w3c.dom.Node;
030: import org.w3c.dom.NodeList;
031: import org.w3c.dom.UserDataHandler;
032: import org.w3c.dom.html.HTMLBodyElement;
033: import org.w3c.dom.html.HTMLCollection;
034: import org.w3c.dom.html.HTMLDocument;
035: import org.w3c.dom.html.HTMLElement;
036: import org.w3c.dom.html.HTMLFrameSetElement;
037: import org.w3c.dom.html.HTMLHeadElement;
038: import org.w3c.dom.html.HTMLHtmlElement;
039: import org.w3c.dom.html.HTMLTitleElement;
040:
041: /**
042: * Implements an HTML document. Provides access to the top level element in the
043: * document, its body and title.
044: * <P>
045: * Several methods create new nodes of all basic types (comment, text, element,
046: * etc.). These methods create new nodes but do not place them in the document
047: * tree. The nodes may be placed in the document tree using {@link
048: * org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or
049: * they may be placed in some other document tree.
050: * <P>
051: * Note: <FRAMESET> documents are not supported at the moment, neither
052: * are direct document writing ({@link #open}, {@link #write}) and HTTP attribute
053: * methods ({@link #getURL}, {@link #getCookie}).
054: *
055: * @xerces.internal
056: *
057: * @version $Revision: 451609 $ $Date: 2006-09-30 12:20:57 -0400 (Sat, 30 Sep 2006) $
058: * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
059: * @see org.w3c.dom.html.HTMLDocument
060: */
061: public class HTMLDocumentImpl extends DocumentImpl implements
062: HTMLDocument {
063:
064: private static final long serialVersionUID = 4285791750126227180L;
065:
066: /**
067: * Holds <code>HTMLCollectionImpl</code> object with live collection of all
068: * anchors in document. This reference is on demand only once.
069: */
070: private HTMLCollectionImpl _anchors;
071:
072: /**
073: * Holds <code>HTMLCollectionImpl</code> object with live collection of all
074: * forms in document. This reference is on demand only once.
075: */
076: private HTMLCollectionImpl _forms;
077:
078: /**
079: * Holds <code>HTMLCollectionImpl</code> object with live collection of all
080: * images in document. This reference is on demand only once.
081: */
082: private HTMLCollectionImpl _images;
083:
084: /**
085: * Holds <code>HTMLCollectionImpl</code> object with live collection of all
086: * links in document. This reference is on demand only once.
087: */
088: private HTMLCollectionImpl _links;
089:
090: /**
091: * Holds <code>HTMLCollectionImpl</code> object with live collection of all
092: * applets in document. This reference is on demand only once.
093: */
094: private HTMLCollectionImpl _applets;
095:
096: /**
097: * Holds string writer used by direct manipulation operation ({@link #open}.
098: * {@link #write}, etc) to write new contents into the document and parse
099: * that text into a document tree.
100: */
101: private StringWriter _writer;
102:
103: /**
104: * Holds names and classes of HTML element types. When an element with a
105: * particular tag name is created, the matching {@link java.lang.Class}
106: * is used to create the element object. For example, <A> matches
107: * {@link HTMLAnchorElementImpl}. This static table is shared across all
108: * HTML documents.
109: *
110: * @see #createElement
111: */
112: private static Hashtable _elementTypesHTML;
113:
114: /**
115: * Signature used to locate constructor of HTML element classes. This
116: * static array is shared across all HTML documents.
117: *
118: * @see #createElement
119: */
120: private static final Class[] _elemClassSigHTML = new Class[] {
121: HTMLDocumentImpl.class, String.class };
122:
123: /**
124: */
125: public HTMLDocumentImpl() {
126: super ();
127: populateElementTypes();
128: }
129:
130: public synchronized Element getDocumentElement() {
131: Node html;
132: Node child;
133: Node next;
134:
135: // The document element is the top-level HTML element of the HTML
136: // document. Only this element should exist at the top level.
137: // If the HTML element is found, all other elements that might
138: // precede it are placed inside the HTML element.
139: html = getFirstChild();
140: while (html != null) {
141: if (html instanceof HTMLHtmlElement) {
142: // REVISIT: [Q] Why is this code even here? In fact, the
143: // original code is in error because it will
144: // try to move ALL nodes to be children of the
145: // HTML tag. This is not the intended behavior
146: // for comments and processing instructions
147: // outside the root element; it will throw a
148: // hierarchy request error exception for doctype
149: // nodes; *and* this code shouldn't even be
150: // needed because the parser should never build
151: // a document that contains more than a single
152: // root element, anyway! -Ac
153: /***
154: synchronized ( html )
155: {
156: child = getFirstChild();
157: while ( child != null && child != html )
158: {
159: next = child.getNextSibling();
160: html.appendChild( child );
161: child = next;
162: }
163: }
164: /***/
165: return (HTMLElement) html;
166: }
167: html = html.getNextSibling();
168: }
169:
170: // HTML element must exist. Create a new element and dump the
171: // entire contents of the document into it in the same order as
172: // they appear now.
173: html = new HTMLHtmlElementImpl(this , "HTML");
174: child = getFirstChild();
175: while (child != null) {
176: next = child.getNextSibling();
177: html.appendChild(child);
178: child = next;
179: }
180: appendChild(html);
181: return (HTMLElement) html;
182: }
183:
184: /**
185: * Obtains the <HEAD> element in the document, creating one if does
186: * not exist before. The <HEAD> element is the first element in the
187: * <HTML> in the document. The <HTML> element is obtained by
188: * calling {@link #getDocumentElement}. If the element does not exist, one
189: * is created.
190: * <P>
191: * Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and
192: * {@link #setBody} to assure the document has the <HEAD> element
193: * correctly placed.
194: *
195: * @return The <HEAD> element
196: */
197: public synchronized HTMLElement getHead() {
198: Node head;
199: Node html;
200: Node child;
201: Node next;
202:
203: // Call getDocumentElement() to get the HTML element that is also the
204: // top-level element in the document. Get the first element in the
205: // document that is called HEAD. Work with that.
206: html = getDocumentElement();
207: synchronized (html) {
208: head = html.getFirstChild();
209: while (head != null && !(head instanceof HTMLHeadElement))
210: head = head.getNextSibling();
211: // HEAD exists but might not be first element in HTML: make sure
212: // it is and return it.
213: if (head != null) {
214: synchronized (head) {
215: child = html.getFirstChild();
216: while (child != null && child != head) {
217: next = child.getNextSibling();
218: head.insertBefore(child, head.getFirstChild());
219: child = next;
220: }
221: }
222: return (HTMLElement) head;
223: }
224:
225: // Head does not exist, create a new one, place it at the top of the
226: // HTML element and return it.
227: head = new HTMLHeadElementImpl(this , "HEAD");
228: html.insertBefore(head, html.getFirstChild());
229: }
230: return (HTMLElement) head;
231: }
232:
233: public synchronized String getTitle() {
234: HTMLElement head;
235: NodeList list;
236: Node title;
237:
238: // Get the HEAD element and look for the TITLE element within.
239: // When found, make sure the TITLE is a direct child of HEAD,
240: // and return the title's text (the Text node contained within).
241: head = getHead();
242: list = head.getElementsByTagName("TITLE");
243: if (list.getLength() > 0) {
244: title = list.item(0);
245: return ((HTMLTitleElement) title).getText();
246: }
247: // No TITLE found, return an empty string.
248: return "";
249: }
250:
251: public synchronized void setTitle(String newTitle) {
252: HTMLElement head;
253: NodeList list;
254: Node title;
255:
256: // Get the HEAD element and look for the TITLE element within.
257: // When found, make sure the TITLE is a direct child of HEAD,
258: // and set the title's text (the Text node contained within).
259: head = getHead();
260: list = head.getElementsByTagName("TITLE");
261: if (list.getLength() > 0) {
262: title = list.item(0);
263: if (title.getParentNode() != head)
264: head.appendChild(title);
265: ((HTMLTitleElement) title).setText(newTitle);
266: } else {
267: // No TITLE found, create a new element and place it at the end
268: // of the HEAD element.
269: title = new HTMLTitleElementImpl(this , "TITLE");
270: ((HTMLTitleElement) title).setText(newTitle);
271: head.appendChild(title);
272: }
273: }
274:
275: public synchronized HTMLElement getBody() {
276: Node html;
277: Node head;
278: Node body;
279: Node child;
280: Node next;
281:
282: // Call getDocumentElement() to get the HTML element that is also the
283: // top-level element in the document. Get the first element in the
284: // document that is called BODY. Work with that.
285: html = getDocumentElement();
286: head = getHead();
287: synchronized (html) {
288: body = head.getNextSibling();
289: while (body != null && !(body instanceof HTMLBodyElement)
290: && !(body instanceof HTMLFrameSetElement))
291: body = body.getNextSibling();
292:
293: // BODY/FRAMESET exists but might not be second element in HTML
294: // (after HEAD): make sure it is and return it.
295: if (body != null) {
296: synchronized (body) {
297: child = head.getNextSibling();
298: while (child != null && child != body) {
299: next = child.getNextSibling();
300: body.insertBefore(child, body.getFirstChild());
301: child = next;
302: }
303: }
304: return (HTMLElement) body;
305: }
306:
307: // BODY does not exist, create a new one, place it in the HTML element
308: // right after the HEAD and return it.
309: body = new HTMLBodyElementImpl(this , "BODY");
310: html.appendChild(body);
311: }
312: return (HTMLElement) body;
313: }
314:
315: public synchronized void setBody(HTMLElement newBody) {
316: Node html;
317: Node body;
318: Node head;
319: Node child;
320: NodeList list;
321:
322: synchronized (newBody) {
323: // Call getDocumentElement() to get the HTML element that is also the
324: // top-level element in the document. Get the first element in the
325: // document that is called BODY. Work with that.
326: html = getDocumentElement();
327: head = getHead();
328: synchronized (html) {
329: list = this .getElementsByTagName("BODY");
330: if (list.getLength() > 0) {
331: // BODY exists but might not follow HEAD in HTML. If not,
332: // make it so and replce it. Start with the HEAD and make
333: // sure the BODY is the first element after the HEAD.
334: body = list.item(0);
335: synchronized (body) {
336: child = head;
337: while (child != null) {
338: if (child instanceof Element) {
339: if (child != body)
340: html.insertBefore(newBody, child);
341: else
342: html.replaceChild(newBody, body);
343: return;
344: }
345: child = child.getNextSibling();
346: }
347: html.appendChild(newBody);
348: }
349: return;
350: }
351: // BODY does not exist, place it in the HTML element
352: // right after the HEAD.
353: html.appendChild(newBody);
354: }
355: }
356: }
357:
358: public synchronized Element getElementById(String elementId) {
359: Element idElement = super .getElementById(elementId);
360: if (idElement != null) {
361: return idElement;
362: }
363: return getElementById(elementId, this );
364: }
365:
366: public NodeList getElementsByName(String elementName) {
367: return new NameNodeListImpl(this , elementName);
368: }
369:
370: public final NodeList getElementsByTagName(String tagName) {
371: return super .getElementsByTagName(tagName
372: .toUpperCase(Locale.ENGLISH));
373: }
374:
375: public final NodeList getElementsByTagNameNS(String namespaceURI,
376: String localName) {
377: if (namespaceURI != null && namespaceURI.length() > 0) {
378: return super .getElementsByTagNameNS(namespaceURI, localName
379: .toUpperCase(Locale.ENGLISH));
380: }
381: return super .getElementsByTagName(localName
382: .toUpperCase(Locale.ENGLISH));
383: }
384:
385: /**
386: * Xerces-specific constructor. "localName" is passed in, so we don't need
387: * to create a new String for it.
388: *
389: * @param namespaceURI The namespace URI of the element to
390: * create.
391: * @param qualifiedName The qualified name of the element type to
392: * instantiate.
393: * @param localpart The local name of the element to instantiate.
394: * @return Element A new Element object with the following attributes:
395: * @throws DOMException INVALID_CHARACTER_ERR: Raised if the specified
396: * name contains an invalid character.
397: */
398: public Element createElementNS(String namespaceURI,
399: String qualifiedName, String localpart) throws DOMException {
400: return createElementNS(namespaceURI, qualifiedName);
401: }
402:
403: public Element createElementNS(String namespaceURI,
404: String qualifiedName) {
405: if (namespaceURI == null || namespaceURI.length() == 0) {
406: return createElement(qualifiedName);
407: }
408: return super .createElementNS(namespaceURI, qualifiedName);
409: }
410:
411: public Element createElement(String tagName) throws DOMException {
412: Class elemClass;
413: Constructor cnst;
414:
415: // First, make sure tag name is all upper case, next get the associated
416: // element class. If no class is found, generate a generic HTML element.
417: // Do so also if an unexpected exception occurs.
418: tagName = tagName.toUpperCase(Locale.ENGLISH);
419: elemClass = (Class) _elementTypesHTML.get(tagName);
420: if (elemClass != null) {
421: // Get the constructor for the element. The signature specifies an
422: // owner document and a tag name. Use the constructor to instantiate
423: // a new object and return it.
424: try {
425: cnst = elemClass.getConstructor(_elemClassSigHTML);
426: return (Element) cnst.newInstance(new Object[] { this ,
427: tagName });
428: } catch (Exception except) {
429: /*
430: Throwable thrw;
431:
432: if ( except instanceof java.lang.reflect.InvocationTargetException )
433: thrw = ( (java.lang.reflect.InvocationTargetException) except ).getTargetException();
434: else
435: thrw = except;
436: System.out.println( "Exception " + thrw.getClass().getName() );
437: System.out.println( thrw.getMessage() );
438: */
439: throw new IllegalStateException(
440: "HTM15 Tag '"
441: + tagName
442: + "' associated with an Element class that failed to construct.\n"
443: + tagName);
444: }
445: }
446: return new HTMLElementImpl(this , tagName);
447: }
448:
449: /**
450: * Creates an Attribute having this Document as its OwnerDoc.
451: * Overrides {@link DocumentImpl#createAttribute} and returns
452: * and attribute whose name is lower case.
453: *
454: * @param name The name of the attribute
455: * @return An attribute whose name is all lower case
456: * @throws DOMException(INVALID_NAME_ERR) if the attribute name
457: * is not acceptable
458: */
459: public Attr createAttribute(String name) throws DOMException {
460: return super .createAttribute(name.toLowerCase(Locale.ENGLISH));
461: }
462:
463: public String getReferrer() {
464: // Information not available on server side.
465: return null;
466: }
467:
468: public String getDomain() {
469: // Information not available on server side.
470: return null;
471: }
472:
473: public String getURL() {
474: // Information not available on server side.
475: return null;
476: }
477:
478: public String getCookie() {
479: // Information not available on server side.
480: return null;
481: }
482:
483: public void setCookie(String cookie) {
484: // Information not available on server side.
485: }
486:
487: public HTMLCollection getImages() {
488: // For more information see HTMLCollection#collectionMatch
489: if (_images == null)
490: _images = new HTMLCollectionImpl(getBody(),
491: HTMLCollectionImpl.IMAGE);
492: return _images;
493: }
494:
495: public HTMLCollection getApplets() {
496: // For more information see HTMLCollection#collectionMatch
497: if (_applets == null)
498: _applets = new HTMLCollectionImpl(getBody(),
499: HTMLCollectionImpl.APPLET);
500: return _applets;
501: }
502:
503: public HTMLCollection getLinks() {
504: // For more information see HTMLCollection#collectionMatch
505: if (_links == null)
506: _links = new HTMLCollectionImpl(getBody(),
507: HTMLCollectionImpl.LINK);
508: return _links;
509: }
510:
511: public HTMLCollection getForms() {
512: // For more information see HTMLCollection#collectionMatch
513: if (_forms == null)
514: _forms = new HTMLCollectionImpl(getBody(),
515: HTMLCollectionImpl.FORM);
516: return _forms;
517: }
518:
519: public HTMLCollection getAnchors() {
520: // For more information see HTMLCollection#collectionMatch
521: if (_anchors == null)
522: _anchors = new HTMLCollectionImpl(getBody(),
523: HTMLCollectionImpl.ANCHOR);
524: return _anchors;
525: }
526:
527: public void open() {
528: // When called an in-memory is prepared. The document tree is still
529: // accessible the old way, until this writer is closed.
530: if (_writer == null)
531: _writer = new StringWriter();
532: }
533:
534: public void close() {
535: // ! NOT IMPLEMENTED, REQUIRES PARSER !
536: if (_writer != null) {
537: _writer = null;
538: }
539: }
540:
541: public void write(String text) {
542: // Write a string into the in-memory writer.
543: if (_writer != null)
544: _writer.write(text);
545: }
546:
547: public void writeln(String text) {
548: // Write a line into the in-memory writer.
549: if (_writer != null)
550: _writer.write(text + "\n");
551: }
552:
553: public Node cloneNode(boolean deep) {
554: HTMLDocumentImpl newdoc = new HTMLDocumentImpl();
555: callUserDataHandlers(this , newdoc, UserDataHandler.NODE_CLONED);
556: cloneNode(newdoc, deep);
557: return newdoc;
558: }
559:
560: /**
561: * Recursive method retreives an element by its <code>id</code> attribute.
562: * Called by {@link #getElementById(String)}.
563: *
564: * @param elementId The <code>id</code> value to look for
565: * @return The node in which to look for
566: */
567: private Element getElementById(String elementId, Node node) {
568: Node child;
569: Element result;
570:
571: child = node.getFirstChild();
572: while (child != null) {
573: if (child instanceof Element) {
574: if (elementId.equals(((Element) child)
575: .getAttribute("id")))
576: return (Element) child;
577: result = getElementById(elementId, child);
578: if (result != null)
579: return result;
580: }
581: child = child.getNextSibling();
582: }
583: return null;
584: }
585:
586: /**
587: * Called by the constructor to populate the element types list (see {@link
588: * #_elementTypesHTML}). Will be called multiple times but populate the list
589: * only the first time. Replacement for static constructor.
590: */
591: private synchronized static void populateElementTypes() {
592: // This class looks like it is due to some strange
593: // (read: inconsistent) JVM bugs.
594: // Initially all this code was placed in the static constructor,
595: // but that caused some early JVMs (1.1) to go mad, and if a
596: // class could not be found (as happened during development),
597: // the JVM would die.
598: // Bertrand Delacretaz <bdelacretaz@worldcom.ch> pointed out
599: // several configurations where HTMLAnchorElementImpl.class
600: // failed, forcing me to revert back to Class.forName().
601:
602: if (_elementTypesHTML != null)
603: return;
604: _elementTypesHTML = new Hashtable(63);
605: populateElementType("A", "HTMLAnchorElementImpl");
606: populateElementType("APPLET", "HTMLAppletElementImpl");
607: populateElementType("AREA", "HTMLAreaElementImpl");
608: populateElementType("BASE", "HTMLBaseElementImpl");
609: populateElementType("BASEFONT", "HTMLBaseFontElementImpl");
610: populateElementType("BLOCKQUOTE", "HTMLQuoteElementImpl");
611: populateElementType("BODY", "HTMLBodyElementImpl");
612: populateElementType("BR", "HTMLBRElementImpl");
613: populateElementType("BUTTON", "HTMLButtonElementImpl");
614: populateElementType("DEL", "HTMLModElementImpl");
615: populateElementType("DIR", "HTMLDirectoryElementImpl");
616: populateElementType("DIV", "HTMLDivElementImpl");
617: populateElementType("DL", "HTMLDListElementImpl");
618: populateElementType("FIELDSET", "HTMLFieldSetElementImpl");
619: populateElementType("FONT", "HTMLFontElementImpl");
620: populateElementType("FORM", "HTMLFormElementImpl");
621: populateElementType("FRAME", "HTMLFrameElementImpl");
622: populateElementType("FRAMESET", "HTMLFrameSetElementImpl");
623: populateElementType("HEAD", "HTMLHeadElementImpl");
624: populateElementType("H1", "HTMLHeadingElementImpl");
625: populateElementType("H2", "HTMLHeadingElementImpl");
626: populateElementType("H3", "HTMLHeadingElementImpl");
627: populateElementType("H4", "HTMLHeadingElementImpl");
628: populateElementType("H5", "HTMLHeadingElementImpl");
629: populateElementType("H6", "HTMLHeadingElementImpl");
630: populateElementType("HR", "HTMLHRElementImpl");
631: populateElementType("HTML", "HTMLHtmlElementImpl");
632: populateElementType("IFRAME", "HTMLIFrameElementImpl");
633: populateElementType("IMG", "HTMLImageElementImpl");
634: populateElementType("INPUT", "HTMLInputElementImpl");
635: populateElementType("INS", "HTMLModElementImpl");
636: populateElementType("ISINDEX", "HTMLIsIndexElementImpl");
637: populateElementType("LABEL", "HTMLLabelElementImpl");
638: populateElementType("LEGEND", "HTMLLegendElementImpl");
639: populateElementType("LI", "HTMLLIElementImpl");
640: populateElementType("LINK", "HTMLLinkElementImpl");
641: populateElementType("MAP", "HTMLMapElementImpl");
642: populateElementType("MENU", "HTMLMenuElementImpl");
643: populateElementType("META", "HTMLMetaElementImpl");
644: populateElementType("OBJECT", "HTMLObjectElementImpl");
645: populateElementType("OL", "HTMLOListElementImpl");
646: populateElementType("OPTGROUP", "HTMLOptGroupElementImpl");
647: populateElementType("OPTION", "HTMLOptionElementImpl");
648: populateElementType("P", "HTMLParagraphElementImpl");
649: populateElementType("PARAM", "HTMLParamElementImpl");
650: populateElementType("PRE", "HTMLPreElementImpl");
651: populateElementType("Q", "HTMLQuoteElementImpl");
652: populateElementType("SCRIPT", "HTMLScriptElementImpl");
653: populateElementType("SELECT", "HTMLSelectElementImpl");
654: populateElementType("STYLE", "HTMLStyleElementImpl");
655: populateElementType("TABLE", "HTMLTableElementImpl");
656: populateElementType("CAPTION", "HTMLTableCaptionElementImpl");
657: populateElementType("TD", "HTMLTableCellElementImpl");
658: populateElementType("TH", "HTMLTableCellElementImpl");
659: populateElementType("COL", "HTMLTableColElementImpl");
660: populateElementType("COLGROUP", "HTMLTableColElementImpl");
661: populateElementType("TR", "HTMLTableRowElementImpl");
662: populateElementType("TBODY", "HTMLTableSectionElementImpl");
663: populateElementType("THEAD", "HTMLTableSectionElementImpl");
664: populateElementType("TFOOT", "HTMLTableSectionElementImpl");
665: populateElementType("TEXTAREA", "HTMLTextAreaElementImpl");
666: populateElementType("TITLE", "HTMLTitleElementImpl");
667: populateElementType("UL", "HTMLUListElementImpl");
668: }
669:
670: private static void populateElementType(String tagName,
671: String className) {
672: try {
673: _elementTypesHTML.put(tagName, ObjectFactory
674: .findProviderClass("org.apache.html.dom."
675: + className, HTMLDocumentImpl.class
676: .getClassLoader(), true));
677: } catch (Exception except) {
678: new RuntimeException(
679: "HTM019 OpenXML Error: Could not find or execute class "
680: + className + " implementing HTML element "
681: + tagName + "\n" + className + "\t"
682: + tagName);
683: }
684: }
685:
686: }
|