001: /**
002: * org/ozone-db/xml/dom/html/HTMLDocumentImpl.java
003: *
004: * The contents of this file are subject to the OpenXML Public
005: * License Version 1.0; you may not use this file except in compliance
006: * with the License. You may obtain a copy of the License at
007: * http://www.openxml.org/license.html
008: *
009: * THIS SOFTWARE IS DISTRIBUTED ON AN "AS IS" BASIS WITHOUT WARRANTY
010: * OF ANY KIND, EITHER EXPRESSED OR IMPLIED. THE INITIAL DEVELOPER
011: * AND ALL CONTRIBUTORS SHALL NOT BE LIABLE FOR ANY DAMAGES AS A
012: * RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
013: * DERIVATIVES. SEE THE LICENSE FOR THE SPECIFIC LANGUAGE GOVERNING
014: * RIGHTS AND LIMITATIONS UNDER THE LICENSE.
015: *
016: * The Initial Developer of this code under the License is Assaf Arkin.
017: * Portions created by Assaf Arkin are Copyright (C) 1998, 1999.
018: * All Rights Reserved.
019: */package org.ozoneDB.xml.dom.html;
020:
021: import java.io.*;
022: import java.util.*;
023: import java.lang.reflect.*;
024: import org.w3c.dom.*;
025: import org.w3c.dom.html.*;
026: import org.ozoneDB.xml.*;
027: import org.ozoneDB.xml.dom.*;
028:
029: /**
030: * Implements an HTML document. Provides access to the top level element in the
031: * document, its body and title.
032: * <P>
033: * Several methods create new nodes of all basic types (comment, text, element,
034: * etc.). These methods create new nodes but do not place them in the document
035: * tree. The nodes may be placed in the document tree using {@link
036: * org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or
037: * they may be placed in some other document tree.
038: * <P>
039: * Note: <FRAMESET> documents are not supported at the moment, neither
040: * are direct document writing ({@link #open}, {@link #write}) and HTTP attribute
041: * methods ({@link #getURL}, {@link #getCookie}).
042: *
043: *
044: * @version $Revision: 1.1 $ $Date: 2001/12/18 11:03:24 $
045: * @author <a href="mailto:arkin@trendline.co.il">Assaf Arkin</a>
046: * @see org.w3c.dom.html.HTMLDocument
047: * @see org.openxml.XMLDocument
048: */
049: public final class HTMLDocumentImpl extends DocumentImpl implements
050: HTMLDocument {
051:
052: public synchronized Element getDocumentElement() {
053: Node html;
054: Node child;
055: Node next;
056:
057: // The document element is the top-level HTML element of the HTML
058: // document. Only this element should exist at the top level.
059: // If the HTML element is found, all other elements that might
060: // precede it are placed inside the HTML element.
061: html = getFirstChild();
062: while (html != null) {
063: if (html instanceof HTMLHtmlElement) {
064: synchronized (html) {
065: child = getFirstChild();
066: while (child != null && child != html) {
067: next = child.getNextSibling();
068: html.appendChild(child);
069: child = next;
070: }
071: }
072: return (HTMLElement) html;
073: }
074: html = html.getNextSibling();
075: }
076:
077: // HTML element must exist. Create a new element and dump the
078: // entire contents of the document into it in the same order as
079: // they appear now.
080: html = new HTMLHtmlElementImpl(
081: (HTMLDocumentImpl) getOwnerDocument(), "HTML");
082: child = getFirstChild();
083: while (child != null) {
084: next = child.getNextSibling();
085: html.appendChild(child);
086: child = next;
087: }
088: appendChild(html);
089: return (HTMLElement) html;
090: }
091:
092: /**
093: * Obtains the <HEAD> element in the document, creating one if does
094: * not exist before. The <HEAD> element is the first element in the
095: * <HTML> in the document. The <HTML> element is obtained by
096: * calling {@link #getDocumentElement}. If the element does not exist, one
097: * is created.
098: * <P>
099: * Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and
100: * {@link #setBody} to assure the document has the <HEAD> element
101: * correctly placed.
102: *
103: * @return The <HEAD> element
104: */
105: public synchronized HTMLElement getHead() {
106: Node head;
107: Node html;
108: Node child;
109: Node next;
110:
111: // Call getDocumentElement() to get the HTML element that is also the
112: // top-level element in the document. Get the first element in the
113: // document that is called HEAD. Work with that.
114: html = getDocumentElement();
115: synchronized (html) {
116: head = html.getFirstChild();
117: while (head != null && !(head instanceof HTMLHeadElement)) {
118: head = head.getNextSibling();
119: }
120: // HEAD exists but might not be first element in HTML: make sure
121: // it is and return it.
122: if (head != null) {
123: synchronized (head) {
124: child = html.getFirstChild();
125: while (child != null && child != head) {
126: next = child.getNextSibling();
127: head.insertBefore(child, head.getFirstChild());
128: child = next;
129: }
130: }
131: return (HTMLElement) head;
132: }
133:
134: // Head does not exist, create a new one, place it at the top of the
135: // HTML element and return it.
136: head = new HTMLHeadElementImpl(
137: (HTMLDocumentImpl) getOwnerDocument(), "HEAD");
138: html.insertBefore(head, html.getFirstChild());
139: }
140: return (HTMLElement) head;
141: }
142:
143: public synchronized String getTitle() {
144: HTMLElement head;
145: Node title;
146:
147: // Get the HEAD element and look for the TITLE element within.
148: // When found, make sure the TITLE is a direct child of HEAD,
149: // and return the title's text (the Text node contained within).
150: head = getHead();
151: title = head.getElementsByTagName("TITLE").item(0);
152: if (title != null) {
153: if (title.getParentNode() != head) {
154: head.appendChild(title);
155: }
156: return ((HTMLTitleElement) title).getText();
157: }
158: // No TITLE found, return an empty string.
159: return "";
160: }
161:
162: public synchronized void setTitle(String newTitle) {
163: HTMLElement head;
164: Node title;
165:
166: // Get the HEAD element and look for the TITLE element within.
167: // When found, make sure the TITLE is a direct child of HEAD,
168: // and set the title's text (the Text node contained within).
169: head = getHead();
170: title = head.getElementsByTagName("TITLE").item(0);
171: if (title != null) {
172: if (title.getParentNode() != head) {
173: head.appendChild(title);
174: }
175: ((HTMLTitleElement) title).setText(newTitle);
176: } else {
177: // No TITLE found, create a new element and place it at the end
178: // of the HEAD element.
179: title = new HTMLTitleElementImpl(
180: (HTMLDocumentImpl) getOwnerDocument(), "TITLE");
181: head.appendChild(title);
182: }
183: }
184:
185: public synchronized HTMLElement getBody() {
186: Node html;
187: Node head;
188: Node body;
189: Node child;
190: Node next;
191:
192: // Call getDocumentElement() to get the HTML element that is also the
193: // top-level element in the document. Get the first element in the
194: // document that is called BODY. Work with that.
195: html = getDocumentElement();
196: head = getHead();
197: synchronized (html) {
198: body = head.getNextSibling();
199: while (body != null && !(body instanceof HTMLBodyElement)) {
200: body = body.getNextSibling();
201: }
202: // If BODY was not found, try looking for FRAMESET instead.
203: if (body == null) {
204: body = head.getNextSibling();
205: while (body != null
206: && !(body instanceof HTMLFrameSetElement)) {
207: body = body.getNextSibling();
208: }
209: }
210:
211: // BODY/FRAMESET exists but might not be second element in HTML
212: // (after HEAD): make sure it is and return it.
213: if (body != null) {
214: synchronized (body) {
215: child = head.getNextSibling();
216: while (child != null && child != body) {
217: next = child.getNextSibling();
218: body.insertBefore(child, body.getFirstChild());
219: child = next;
220: }
221: }
222: return (HTMLElement) body;
223: }
224:
225: // BODY does not exist, create a new one, place it in the HTML element
226: // right after the HEAD and return it.
227: body = new HTMLBodyElementImpl(
228: (HTMLDocumentImpl) getOwnerDocument(), "BODY");
229: html.appendChild(body);
230: }
231: return (HTMLElement) body;
232: }
233:
234: public synchronized void setBody(HTMLElement newBody) {
235: Node html;
236: Node body;
237: Node head;
238: Node child;
239:
240: synchronized (newBody) {
241: // Call getDocumentElement() to get the HTML element that is also the
242: // top-level element in the document. Get the first element in the
243: // document that is called BODY. Work with that.
244: html = getDocumentElement();
245: head = getHead();
246: synchronized (html) {
247: body = this .getElementsByTagName("BODY").item(0);
248: // BODY exists but might not follow HEAD in HTML. If not,
249: // make it so and replce it. Start with the HEAD and make
250: // sure the BODY is the first element after the HEAD.
251: if (body != null) {
252: synchronized (body) {
253: child = head;
254: while (child != null) {
255: if (child instanceof Element) {
256: if (child != body) {
257: html.insertBefore(newBody, child);
258: } else {
259: html.replaceChild(newBody, body);
260: }
261: return;
262: }
263: child = child.getNextSibling();
264: }
265: html.appendChild(newBody);
266: }
267: return;
268: }
269: // BODY does not exist, place it in the HTML element
270: // right after the HEAD.
271: html.appendChild(newBody);
272: }
273: }
274: }
275:
276: public Element getElementById(String elementId) {
277: return getElementById(elementId, this );
278: }
279:
280: public NodeList getElementsByName(String elementName) {
281: return new HTMLElementListImpl(this , "name");
282: }
283:
284: public Element createElement(String tagName) throws DOMException {
285: Class elemClass;
286: Constructor cnst;
287:
288: // First, make sure tag name is all upper case, next get the associated
289: // element class. If no class is found, generate a generic HTML element.
290: // Do so also if an unexpected exception occurs.
291: tagName = tagName.toUpperCase();
292: elemClass = (Class) _elementTypesHTML.get(tagName);
293: if (elemClass != null) {
294: // Get the constructor for the element. The signature specifies an
295: // owner document and a tag name. Use the constructor to instantiate
296: // a new object and return it.
297: try {
298: cnst = elemClass.getConstructor(_elemClassSigHTML);
299: return (Element) cnst.newInstance(new Object[] { this ,
300: tagName });
301: } catch (Exception except) {
302: Throwable thrw;
303:
304: if (except instanceof java.lang.reflect.InvocationTargetException) {
305: thrw = ((java.lang.reflect.InvocationTargetException) except)
306: .getTargetException();
307: } else {
308: thrw = except;
309: }
310: System.out.println("Exception "
311: + thrw.getClass().getName());
312: System.out.println(thrw.getMessage());
313:
314: throw new IllegalStateException(
315: "Tag '"
316: + tagName
317: + "' associated with an Element class that failed to construct.");
318: }
319: }
320: return new HTMLElementImpl(this , tagName);
321: }
322:
323: public String getReferrer() {
324: // Information not available on server side.
325: return null;
326: }
327:
328: public String getDomain() {
329: // Information not available on server side.
330: return null;
331: }
332:
333: public String getURL() {
334: // Information not available on server side.
335: return null;
336: }
337:
338: public String getCookie() {
339: // Information not available on server side.
340: return null;
341: }
342:
343: public void setCookie(String cookie) {
344: // Information not available on server side.
345: }
346:
347: public HTMLCollection getImages() {
348: // For more information see HTMLCollection#collectionMatch
349: if (_images == null) {
350: _images = new HTMLCollectionImpl(getBody(),
351: HTMLCollectionImpl.IMAGE);
352: }
353: return _images;
354: }
355:
356: public HTMLCollection getApplets() {
357: // For more information see HTMLCollection#collectionMatch
358: if (_applets == null) {
359: _applets = new HTMLCollectionImpl(getBody(),
360: HTMLCollectionImpl.APPLET);
361: }
362: return _applets;
363: }
364:
365: public HTMLCollection getLinks() {
366: // For more information see HTMLCollection#collectionMatch
367: if (_links == null) {
368: _links = new HTMLCollectionImpl(getBody(),
369: HTMLCollectionImpl.LINK);
370: }
371: return _links;
372: }
373:
374: public HTMLCollection getForms() {
375: // For more information see HTMLCollection#collectionMatch
376: if (_forms == null) {
377: _forms = new HTMLCollectionImpl(getBody(),
378: HTMLCollectionImpl.FORM);
379: }
380: return _forms;
381: }
382:
383: public HTMLCollection getAnchors() {
384: // For more information see HTMLCollection#collectionMatch
385: if (_anchors == null) {
386: _anchors = new HTMLCollectionImpl(getBody(),
387: HTMLCollectionImpl.ANCHOR);
388: }
389: return _anchors;
390: }
391:
392: public void open() {
393: // When called an in-memory is prepared. The document tree is still
394: // accessible the old way, until this writer is closed.
395: if (_writer == null) {
396: _writer = new StringWriter();
397: }
398: }
399:
400: public void close() {
401: // ! NOT IMPLEMENTED, REQUIRES PARSER !
402: if (_writer != null) {
403: _writer = null;
404: }
405: }
406:
407: public void write(String text) {
408: // Write a string into the in-memory writer.
409: if (_writer != null) {
410: _writer.write(text);
411: }
412: }
413:
414: public void writeln(String text) {
415: // Write a line into the in-memory writer.
416: if (_writer != null) {
417: _writer.write(text + "\n");
418: }
419: }
420:
421: public Object clone() {
422: HTMLDocumentImpl clone;
423:
424: clone = new HTMLDocumentImpl();
425: cloneInto(clone, true);
426: return clone;
427: }
428:
429: public Node cloneNode(boolean deep) {
430: HTMLDocumentImpl clone;
431:
432: clone = new HTMLDocumentImpl();
433: cloneInto(clone, deep);
434: return clone;
435: }
436:
437: protected Node castNewChild(Node newChild) throws DOMException {
438: // Same method appears in HTMLElementImpl and HTMLDocumentImpl.
439:
440: if (newChild == null) {
441: throw new DOMExceptionImpl(
442: DOMException.HIERARCHY_REQUEST_ERR,
443: "Child reference is null.");
444: }
445: if (!(newChild instanceof NodeImpl)) {
446: throw new DOMExceptionImpl(
447: DOMException.HIERARCHY_REQUEST_ERR,
448: "Child is not a compatible type for this node.");
449: }
450:
451: // newChild must be HTMLElement, Text, Comment, DocumentFragment or
452: // ProcessingInstruction. CDATASection and EntityReference not supported
453: // in HTML documents.
454: if (!(newChild instanceof HTMLElementImpl
455: || newChild instanceof Comment
456: || newChild instanceof Text
457: || newChild instanceof DocumentFragment || newChild instanceof ProcessingInstruction)) {
458: throw new DOMExceptionImpl(
459: DOMException.HIERARCHY_REQUEST_ERR,
460: "Child is not a compatible type for this node.");
461: }
462: return (NodeImpl) newChild;
463: }
464:
465: /**
466: * Recursive method retreives an element by its <code>id</code> attribute.
467: * Called by {@link #getElementById(String)}.
468: *
469: * @param elementId The <code>id</code> value to look for
470: * @return The node in which to look for
471: */
472: private Element getElementById(String elementId, Node node) {
473: Node child;
474: Element result;
475:
476: child = node.getFirstChild();
477: while (child != null) {
478: if (child instanceof Element) {
479: if (elementId.equals(((Element) child)
480: .getAttribute("id"))) {
481: return (Element) child;
482: }
483: result = getElementById(elementId, child);
484: if (result != null) {
485: return result;
486: }
487: }
488: child = child.getNextSibling();
489: }
490: return null;
491: }
492:
493: /**
494: * Called by the constructor to populate the element types list (see {@link
495: * #_elementTypesHTML}). Will be called multiple times but populate the list
496: * only the first time. Replacement for static constructor due to unknown
497: * problem with the static constructor.
498: */
499: private static void populateElementTypes() {
500: if (_elementTypesHTML != null) {
501: return;
502: }
503: _elementTypesHTML = new Hashtable(63);
504: _elementTypesHTML.put("A", HTMLAnchorElementImpl.class);
505: _elementTypesHTML.put("APPLET", HTMLAppletElementImpl.class);
506: _elementTypesHTML.put("AREA", HTMLAreaElementImpl.class);
507: _elementTypesHTML.put("BASE", HTMLBaseElementImpl.class);
508: _elementTypesHTML
509: .put("BASEFONT", HTMLBaseFontElementImpl.class);
510: _elementTypesHTML.put("BLOCKQUOTE",
511: HTMLBlockquoteElementImpl.class);
512: _elementTypesHTML.put("BODY", HTMLBodyElementImpl.class);
513: _elementTypesHTML.put("BR", HTMLBRElementImpl.class);
514: _elementTypesHTML.put("BUTTON", HTMLButtonElementImpl.class);
515: _elementTypesHTML.put("DEL", HTMLModElementImpl.class);
516: _elementTypesHTML.put("DIR", HTMLDirectoryElementImpl.class);
517: _elementTypesHTML.put("DIV", HTMLDivElementImpl.class);
518: _elementTypesHTML.put("DL", HTMLDListElementImpl.class);
519: _elementTypesHTML
520: .put("FIELDSET", HTMLFieldSetElementImpl.class);
521: _elementTypesHTML.put("FONT", HTMLFontElementImpl.class);
522: _elementTypesHTML.put("FORM", HTMLFormElementImpl.class);
523: _elementTypesHTML.put("FRAME", HTMLFrameElementImpl.class);
524: _elementTypesHTML
525: .put("FRAMESET", HTMLFrameSetElementImpl.class);
526: _elementTypesHTML.put("HEAD", HTMLHeadElementImpl.class);
527: _elementTypesHTML.put("H1", HTMLHeadingElementImpl.class);
528: _elementTypesHTML.put("H2", HTMLHeadingElementImpl.class);
529: _elementTypesHTML.put("H3", HTMLHeadingElementImpl.class);
530: _elementTypesHTML.put("H4", HTMLHeadingElementImpl.class);
531: _elementTypesHTML.put("H5", HTMLHeadingElementImpl.class);
532: _elementTypesHTML.put("H6", HTMLHeadingElementImpl.class);
533: _elementTypesHTML.put("HR", HTMLHRElementImpl.class);
534: _elementTypesHTML.put("HTML", HTMLHtmlElementImpl.class);
535: _elementTypesHTML.put("IFRAME", HTMLIFrameElementImpl.class);
536: _elementTypesHTML.put("IMG", HTMLImageElementImpl.class);
537: _elementTypesHTML.put("INPUT", HTMLInputElementImpl.class);
538: _elementTypesHTML.put("INS", HTMLModElementImpl.class);
539: _elementTypesHTML.put("ISINDEX", HTMLIsIndexElementImpl.class);
540: _elementTypesHTML.put("LABEL", HTMLLabelElementImpl.class);
541: _elementTypesHTML.put("LEGEND", HTMLLegendElementImpl.class);
542: _elementTypesHTML.put("LI", HTMLLIElementImpl.class);
543: _elementTypesHTML.put("LINK", HTMLLinkElementImpl.class);
544: _elementTypesHTML.put("MAP", HTMLMapElementImpl.class);
545: _elementTypesHTML.put("MENU", HTMLMenuElementImpl.class);
546: _elementTypesHTML.put("META", HTMLMetaElementImpl.class);
547: _elementTypesHTML.put("OBJECT", HTMLObjectElementImpl.class);
548: _elementTypesHTML.put("OL", HTMLOListElementImpl.class);
549: _elementTypesHTML
550: .put("OPTGROUP", HTMLOptGroupElementImpl.class);
551: _elementTypesHTML.put("OPTION", HTMLOptionElementImpl.class);
552: _elementTypesHTML.put("P", HTMLParagraphElementImpl.class);
553: _elementTypesHTML.put("PARAM", HTMLParamElementImpl.class);
554: _elementTypesHTML.put("PRE", HTMLPreElementImpl.class);
555: _elementTypesHTML.put("Q", HTMLQuoteElementImpl.class);
556: _elementTypesHTML.put("SCRIPT", HTMLScriptElementImpl.class);
557: _elementTypesHTML.put("SELECT", HTMLSelectElementImpl.class);
558: _elementTypesHTML.put("STYLE", HTMLStyleElementImpl.class);
559: _elementTypesHTML.put("TABLE", HTMLTableElementImpl.class);
560: _elementTypesHTML.put("CAPTION",
561: HTMLTableCaptionElementImpl.class);
562: _elementTypesHTML.put("TD", HTMLTableCellElementImpl.class);
563: _elementTypesHTML.put("COL", HTMLTableColElementImpl.class);
564: _elementTypesHTML
565: .put("COLGROUP", HTMLTableColElementImpl.class);
566: _elementTypesHTML.put("TR", HTMLTableRowElementImpl.class);
567: _elementTypesHTML.put("TBODY",
568: HTMLTableSectionElementImpl.class);
569: _elementTypesHTML.put("THEAD",
570: HTMLTableSectionElementImpl.class);
571: _elementTypesHTML.put("TFOOT",
572: HTMLTableSectionElementImpl.class);
573: _elementTypesHTML
574: .put("TEXTAREA", HTMLTextAreaElementImpl.class);
575: _elementTypesHTML.put("TITLE", HTMLTitleElementImpl.class);
576: _elementTypesHTML.put("UL", HTMLUListElementImpl.class);
577: }
578:
579: /**
580: */
581: public HTMLDocumentImpl() {
582: super ();
583: populateElementTypes();
584: }
585:
586: /**
587: * Holds {@link HTMLCollectionImpl} object with live collection of all
588: * anchors in document. This reference is on demand only once.
589: */
590: private HTMLCollectionImpl _anchors;
591:
592: /**
593: * Holds {@link HTMLCollectionImpl} object with live collection of all
594: * forms in document. This reference is on demand only once.
595: */
596: private HTMLCollectionImpl _forms;
597:
598: /**
599: * Holds {@link HTMLCollectionImpl} object with live collection of all
600: * images in document. This reference is on demand only once.
601: */
602: private HTMLCollectionImpl _images;
603:
604: /**
605: * Holds {@link HTMLCollectionImpl} object with live collection of all
606: * links in document. This reference is on demand only once.
607: */
608: private HTMLCollectionImpl _links;
609:
610: /**
611: * Holds {@link HTMLCollectionImpl} object with live collection of all
612: * applets in document. This reference is on demand only once.
613: */
614: private HTMLCollectionImpl _applets;
615:
616: /**
617: * Holds string writer used by direct manipulation operation ({@link #open}.
618: * {@link #write}, etc) to write new contents into the document and parse
619: * that text into a document tree.
620: */
621: private StringWriter _writer;
622:
623: /**
624: * Holds names and classes of HTML element types. When an element with a
625: * particular tag name is created, the matching {@link java.lang.Class}
626: * is used to create the element object. For example, <A> matches
627: * {@link HTMLAnchorElementImpl}. This static table is shared across all
628: * HTML documents, as opposed to the non-static table defined in {@link
629: * org.openxml.dom.DocumentImpl}.
630: *
631: * @see #createElement
632: */
633: private static Hashtable _elementTypesHTML;
634:
635: /**
636: * Signature used to locate constructor of HTML element classes. This
637: * static array is shared across all HTML documents.
638: *
639: * @see #createElement
640: */
641: private final static Class[] _elemClassSigHTML = new Class[] {
642: HTMLDocumentImpl.class, String.class };
643:
644: }
|