001: /*
002: * @(#)Node.java 1.11 2000/08/16
003: *
004: */
005:
006: package org.w3c.tidy;
007:
008: /**
009: *
010: * Node
011: *
012: * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
013: * See Tidy.java for the copyright notice.
014: * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
015: * HTML Tidy Release 4 Aug 2000</a>
016: *
017: * @author Dave Raggett <dsr@w3.org>
018: * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
019: * @version 1.0, 1999/05/22
020: * @version 1.0.1, 1999/05/29
021: * @version 1.1, 1999/06/18 Java Bean
022: * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
023: * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
024: * @version 1.4, 1999/09/04 DOM support
025: * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
026: * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
027: * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
028: * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
029: * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
030: * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
031: * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
032: */
033:
034: /*
035: Used for elements and text nodes
036: element name is null for text nodes
037: start and end are offsets into lexbuf
038: which contains the textual content of
039: all elements in the parse tree.
040:
041: parent and content allow traversal
042: of the parse tree in any direction.
043: attributes are represented as a linked
044: list of AttVal nodes which hold the
045: strings for attribute/value pairs.
046: */
047:
048: public class Node {
049:
050: public static final short RootNode = 0;
051: public static final short DocTypeTag = 1;
052: public static final short CommentTag = 2;
053: public static final short ProcInsTag = 3;
054: public static final short TextNode = 4;
055: public static final short StartTag = 5;
056: public static final short EndTag = 6;
057: public static final short StartEndTag = 7;
058: public static final short CDATATag = 8;
059: public static final short SectionTag = 9;
060: public static final short AspTag = 10;
061: public static final short JsteTag = 11;
062: public static final short PhpTag = 12;
063:
064: protected Node parent;
065: protected Node prev;
066: protected Node next;
067: protected Node last;
068: protected int start; /* start of span onto text array */
069: protected int end; /* end of span onto text array */
070: protected byte[] textarray; /* the text array */
071: protected short type; /* TextNode, StartTag, EndTag etc. */
072: protected boolean closed; /* true if closed by explicit end tag */
073: protected boolean implicit; /* true if inferred */
074: protected boolean linebreak; /* true if followed by a line break */
075: protected Dict was; /* old tag when it was changed */
076: protected Dict tag; /* tag's dictionary definition */
077: protected String element; /* name (null for text nodes) */
078: protected AttVal attributes;
079: protected Node content;
080:
081: public Node() {
082: this (TextNode, null, 0, 0);
083: }
084:
085: public Node(short type, byte[] textarray, int start, int end) {
086: this .parent = null;
087: this .prev = null;
088: this .next = null;
089: this .last = null;
090: this .start = start;
091: this .end = end;
092: this .textarray = textarray;
093: this .type = type;
094: this .closed = false;
095: this .implicit = false;
096: this .linebreak = false;
097: this .was = null;
098: this .tag = null;
099: this .element = null;
100: this .attributes = null;
101: this .content = null;
102: }
103:
104: public Node(short type, byte[] textarray, int start, int end,
105: String element, TagTable tt) {
106: this .parent = null;
107: this .prev = null;
108: this .next = null;
109: this .last = null;
110: this .start = start;
111: this .end = end;
112: this .textarray = textarray;
113: this .type = type;
114: this .closed = false;
115: this .implicit = false;
116: this .linebreak = false;
117: this .was = null;
118: this .tag = null;
119: this .element = element;
120: this .attributes = null;
121: this .content = null;
122: if (type == StartTag || type == StartEndTag || type == EndTag)
123: tt.findTag(this );
124: }
125:
126: /* used to clone heading nodes when split by an <HR> */
127: protected Object clone() {
128: Node node = new Node();
129:
130: node.parent = this .parent;
131: if (this .textarray != null) {
132: node.textarray = new byte[this .end - this .start];
133: node.start = 0;
134: node.end = this .end - this .start;
135: if (node.end > 0)
136: System.arraycopy(this .textarray, this .start,
137: node.textarray, node.start, node.end);
138: }
139: node.type = this .type;
140: node.closed = this .closed;
141: node.implicit = this .implicit;
142: node.linebreak = this .linebreak;
143: node.was = this .was;
144: node.tag = this .tag;
145: if (this .element != null)
146: node.element = this .element;
147: if (this .attributes != null)
148: node.attributes = (AttVal) this .attributes.clone();
149: return node;
150: }
151:
152: public AttVal getAttrByName(String name) {
153: AttVal attr;
154:
155: for (attr = this .attributes; attr != null; attr = attr.next) {
156: if (name != null && attr.attribute != null
157: && attr.attribute.equals(name))
158: break;
159: }
160:
161: return attr;
162: }
163:
164: /* default method for checking an element's attributes */
165: public void checkAttributes(Lexer lexer) {
166: AttVal attval;
167:
168: for (attval = this .attributes; attval != null; attval = attval.next)
169: attval.checkAttribute(lexer, this );
170: }
171:
172: public void checkUniqueAttributes(Lexer lexer) {
173: AttVal attval;
174:
175: for (attval = this .attributes; attval != null; attval = attval.next) {
176: if (attval.asp == null && attval.php == null)
177: attval.checkUniqueAttribute(lexer, this );
178: }
179: }
180:
181: public void addAttribute(String name, String value) {
182: AttVal av = new AttVal(null, null, null, null, '"', name, value);
183: av.dict = AttributeTable.getDefaultAttributeTable()
184: .findAttribute(av);
185:
186: if (this .attributes == null)
187: this .attributes = av;
188: else /* append to end of attributes */
189: {
190: AttVal here = this .attributes;
191:
192: while (here.next != null)
193: here = here.next;
194:
195: here.next = av;
196: }
197: }
198:
199: /* remove attribute from node then free it */
200: public void removeAttribute(AttVal attr) {
201: AttVal av;
202: AttVal prev = null;
203: AttVal next;
204:
205: for (av = this .attributes; av != null; av = next) {
206: next = av.next;
207:
208: if (av == attr) {
209: if (prev != null)
210: prev.next = next;
211: else
212: this .attributes = next;
213: } else
214: prev = av;
215: }
216: }
217:
218: /* find doctype element */
219: public Node findDocType() {
220: Node node;
221:
222: for (node = this .content; node != null
223: && node.type != DocTypeTag; node = node.next)
224: ;
225:
226: return node;
227: }
228:
229: public void discardDocType() {
230: Node node;
231:
232: node = findDocType();
233: if (node != null) {
234: if (node.prev != null)
235: node.prev.next = node.next;
236: else
237: node.parent.content = node.next;
238:
239: if (node.next != null)
240: node.next.prev = node.prev;
241:
242: node.next = null;
243: }
244: }
245:
246: /* remove node from markup tree and discard it */
247: public static Node discardElement(Node element) {
248: Node next = null;
249:
250: if (element != null) {
251: next = element.next;
252: removeNode(element);
253: }
254:
255: return next;
256: }
257:
258: /* insert node into markup tree */
259: public static void insertNodeAtStart(Node element, Node node) {
260: node.parent = element;
261:
262: if (element.content == null)
263: element.last = node;
264: else
265: element.content.prev = node; // AQ added 13 Apr 2000
266:
267: node.next = element.content;
268: node.prev = null;
269: element.content = node;
270: }
271:
272: /* insert node into markup tree */
273: public static void insertNodeAtEnd(Node element, Node node) {
274: node.parent = element;
275: node.prev = element.last;
276:
277: if (element.last != null)
278: element.last.next = node;
279: else
280: element.content = node;
281:
282: element.last = node;
283: }
284:
285: /*
286: insert node into markup tree in pace of element
287: which is moved to become the child of the node
288: */
289: public static void insertNodeAsParent(Node element, Node node) {
290: node.content = element;
291: node.last = element;
292: node.parent = element.parent;
293: element.parent = node;
294:
295: if (node.parent.content == element)
296: node.parent.content = node;
297:
298: if (node.parent.last == element)
299: node.parent.last = node;
300:
301: node.prev = element.prev;
302: element.prev = null;
303:
304: if (node.prev != null)
305: node.prev.next = node;
306:
307: node.next = element.next;
308: element.next = null;
309:
310: if (node.next != null)
311: node.next.prev = node;
312: }
313:
314: /* insert node into markup tree before element */
315: public static void insertNodeBeforeElement(Node element, Node node) {
316: Node parent;
317:
318: parent = element.parent;
319: node.parent = parent;
320: node.next = element;
321: node.prev = element.prev;
322: element.prev = node;
323:
324: if (node.prev != null)
325: node.prev.next = node;
326:
327: if (parent.content == element)
328: parent.content = node;
329: }
330:
331: /* insert node into markup tree after element */
332: public static void insertNodeAfterElement(Node element, Node node) {
333: Node parent;
334:
335: parent = element.parent;
336: node.parent = parent;
337:
338: // AQ - 13Jan2000 fix for parent == null
339: if (parent != null && parent.last == element)
340: parent.last = node;
341: else {
342: node.next = element.next;
343: // AQ - 13Jan2000 fix for node.next == null
344: if (node.next != null)
345: node.next.prev = node;
346: }
347:
348: element.next = node;
349: node.prev = element;
350: }
351:
352: public static void trimEmptyElement(Lexer lexer, Node element) {
353: TagTable tt = lexer.configuration.tt;
354:
355: if (lexer.canPrune(element)) {
356: if (element.type != TextNode)
357: Report.warning(lexer, element, null,
358: Report.TRIM_EMPTY_ELEMENT);
359:
360: discardElement(element);
361: } else if (element.tag == tt.tagP && element.content == null) {
362: /* replace <p></p> by <br><br> to preserve formatting */
363: Node node = lexer.inferredTag("br");
364: Node.coerceNode(lexer, element, tt.tagBr);
365: Node.insertNodeAfterElement(element, node);
366: }
367: }
368:
369: /*
370: This maps
371: <em>hello </em><strong>world</strong>
372: to
373: <em>hello</em> <strong>world</strong>
374:
375: If last child of element is a text node
376: then trim trailing white space character
377: moving it to after element's end tag.
378: */
379: public static void trimTrailingSpace(Lexer lexer, Node element,
380: Node last) {
381: byte c;
382: TagTable tt = lexer.configuration.tt;
383:
384: if (last != null && last.type == Node.TextNode
385: && last.end > last.start) {
386: c = lexer.lexbuf[last.end - 1];
387:
388: if (c == 160 || c == (byte) ' ') {
389: /* take care with <td> </td> */
390: if (element.tag == tt.tagTd || element.tag == tt.tagTh) {
391: if (last.end > last.start + 1)
392: last.end -= 1;
393: } else {
394: last.end -= 1;
395:
396: if (((element.tag.model & Dict.CM_INLINE) != 0)
397: && !((element.tag.model & Dict.CM_FIELD) != 0))
398: lexer.insertspace = true;
399:
400: /* if empty string then delete from parse tree */
401: if (last.start == last.end)
402: trimEmptyElement(lexer, last);
403: }
404: }
405: }
406: }
407:
408: /*
409: This maps
410: <p>hello<em> world</em>
411: to
412: <p>hello <em>world</em>
413:
414: Trims initial space, by moving it before the
415: start tag, or if this element is the first in
416: parent's content, then by discarding the space
417: */
418: public static void trimInitialSpace(Lexer lexer, Node element,
419: Node text) {
420: Node prev, node;
421:
422: // GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
423: // 31-Oct-00.
424: if (text.type == TextNode
425: && text.textarray[text.start] == (byte) ' '
426: && (text.start < text.end)) {
427: if (((element.tag.model & Dict.CM_INLINE) != 0)
428: && !((element.tag.model & Dict.CM_FIELD) != 0)
429: && element.parent.content != element) {
430: prev = element.prev;
431:
432: if (prev != null && prev.type == TextNode) {
433: if (prev.textarray[prev.end - 1] != (byte) ' ')
434: prev.textarray[prev.end++] = (byte) ' ';
435:
436: ++element.start;
437: } else /* create new node */
438: {
439: node = lexer.newNode();
440: // Local fix for bug 228486 (GLP). This handles the case
441: // where we need to create a preceeding text node but there are
442: // no "slots" in textarray that we can steal from the current
443: // element. Therefore, we create a new textarray containing
444: // just the blank. When Tidy is fixed, this should be removed.
445: if (element.start >= element.end) {
446: node.start = 0;
447: node.end = 1;
448: node.textarray = new byte[1];
449: } else {
450: node.start = element.start++;
451: node.end = element.start;
452: node.textarray = element.textarray;
453: }
454: node.textarray[node.start] = (byte) ' ';
455: node.prev = prev;
456: if (prev != null)
457: prev.next = node;
458: node.next = element;
459: element.prev = node;
460: node.parent = element.parent;
461: }
462: }
463:
464: /* discard the space in current node */
465: ++text.start;
466: }
467: }
468:
469: /*
470: Move initial and trailing space out.
471: This routine maps:
472:
473: hello<em> world</em>
474: to
475: hello <em>world</em>
476: and
477: <em>hello </em><strong>world</strong>
478: to
479: <em>hello</em> <strong>world</strong>
480: */
481: public static void trimSpaces(Lexer lexer, Node element) {
482: Node text = element.content;
483: TagTable tt = lexer.configuration.tt;
484:
485: if (text != null && text.type == Node.TextNode
486: && element.tag != tt.tagPre)
487: trimInitialSpace(lexer, element, text);
488:
489: text = element.last;
490:
491: if (text != null && text.type == Node.TextNode)
492: trimTrailingSpace(lexer, element, text);
493: }
494:
495: public boolean isDescendantOf(Dict tag) {
496: Node parent;
497:
498: for (parent = this .parent; parent != null; parent = parent.parent) {
499: if (parent.tag == tag)
500: return true;
501: }
502:
503: return false;
504: }
505:
506: /*
507: the doctype has been found after other tags,
508: and needs moving to before the html element
509: */
510: public static void insertDocType(Lexer lexer, Node element,
511: Node doctype) {
512: TagTable tt = lexer.configuration.tt;
513:
514: Report.warning(lexer, element, doctype,
515: Report.DOCTYPE_AFTER_TAGS);
516:
517: while (element.tag != tt.tagHtml)
518: element = element.parent;
519:
520: insertNodeBeforeElement(element, doctype);
521: }
522:
523: public Node findBody(TagTable tt) {
524: Node node;
525:
526: node = this .content;
527:
528: while (node != null && node.tag != tt.tagHtml)
529: node = node.next;
530:
531: if (node == null)
532: return null;
533:
534: node = node.content;
535:
536: while (node != null && node.tag != tt.tagBody)
537: node = node.next;
538:
539: return node;
540: }
541:
542: public boolean isElement() {
543: return (this .type == StartTag || this .type == StartEndTag ? true
544: : false);
545: }
546:
547: /*
548: unexpected content in table row is moved to just before
549: the table in accordance with Netscape and IE. This code
550: assumes that node hasn't been inserted into the row.
551: */
552: public static void moveBeforeTable(Node row, Node node, TagTable tt) {
553: Node table;
554:
555: /* first find the table element */
556: for (table = row.parent; table != null; table = table.parent) {
557: if (table.tag == tt.tagTable) {
558: if (table.parent.content == table)
559: table.parent.content = node;
560:
561: node.prev = table.prev;
562: node.next = table;
563: table.prev = node;
564: node.parent = table.parent;
565:
566: if (node.prev != null)
567: node.prev.next = node;
568:
569: break;
570: }
571: }
572: }
573:
574: /*
575: if a table row is empty then insert an empty cell
576: this practice is consistent with browser behavior
577: and avoids potential problems with row spanning cells
578: */
579: public static void fixEmptyRow(Lexer lexer, Node row) {
580: Node cell;
581:
582: if (row.content == null) {
583: cell = lexer.inferredTag("td");
584: insertNodeAtEnd(row, cell);
585: Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
586: }
587: }
588:
589: public static void coerceNode(Lexer lexer, Node node, Dict tag) {
590: Node tmp = lexer.inferredTag(tag.name);
591: Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
592: node.was = node.tag;
593: node.tag = tag;
594: node.type = StartTag;
595: node.implicit = true;
596: node.element = tag.name;
597: }
598:
599: /* extract a node and its children from a markup tree */
600: public static void removeNode(Node node) {
601: if (node.prev != null)
602: node.prev.next = node.next;
603:
604: if (node.next != null)
605: node.next.prev = node.prev;
606:
607: if (node.parent != null) {
608: if (node.parent.content == node)
609: node.parent.content = node.next;
610:
611: if (node.parent.last == node)
612: node.parent.last = node.prev;
613: }
614:
615: node.parent = node.prev = node.next = null;
616: }
617:
618: public static boolean insertMisc(Node element, Node node) {
619: if (node.type == CommentTag || node.type == ProcInsTag
620: || node.type == CDATATag || node.type == SectionTag
621: || node.type == AspTag || node.type == JsteTag
622: || node.type == PhpTag) {
623: insertNodeAtEnd(element, node);
624: return true;
625: }
626:
627: return false;
628: }
629:
630: /*
631: used to determine how attributes
632: without values should be printed
633: this was introduced to deal with
634: user defined tags e.g. Cold Fusion
635: */
636: public static boolean isNewNode(Node node) {
637: if (node != null && node.tag != null) {
638: return ((node.tag.model & Dict.CM_NEW) != 0);
639: }
640:
641: return true;
642: }
643:
644: public boolean hasOneChild() {
645: return (this .content != null && this .content.next == null);
646: }
647:
648: /* find html element */
649: public Node findHTML(TagTable tt) {
650: Node node;
651:
652: for (node = this .content; node != null
653: && node.tag != tt.tagHtml; node = node.next)
654: ;
655:
656: return node;
657: }
658:
659: public Node findHEAD(TagTable tt) {
660: Node node;
661:
662: node = this .findHTML(tt);
663:
664: if (node != null) {
665: for (node = node.content; node != null
666: && node.tag != tt.tagHead; node = node.next)
667: ;
668: }
669:
670: return node;
671: }
672:
673: public boolean checkNodeIntegrity() {
674: Node child;
675: boolean found = false;
676:
677: if (this .prev != null) {
678: if (this .prev.next != this )
679: return false;
680: }
681:
682: if (this .next != null) {
683: if (this .next.prev != this )
684: return false;
685: }
686:
687: if (this .parent != null) {
688: if (this .prev == null && this .parent.content != this )
689: return false;
690:
691: if (this .next == null && this .parent.last != this )
692: return false;
693:
694: for (child = this .parent.content; child != null; child = child.next)
695: if (child == this ) {
696: found = true;
697: break;
698: }
699:
700: if (!found)
701: return false;
702: }
703:
704: for (child = this .content; child != null; child = child.next)
705: if (!child.checkNodeIntegrity())
706: return false;
707:
708: return true;
709: }
710:
711: /*
712: Add class="foo" to node
713: */
714: public static void addClass(Node node, String classname) {
715: AttVal classattr = node.getAttrByName("class");
716:
717: /*
718: if there already is a class attribute
719: then append class name after a space
720: */
721: if (classattr != null) {
722: classattr.value = classattr.value + " " + classname;
723: } else
724: /* create new class attribute */
725: node.addAttribute("class", classname);
726: }
727:
728: /* --------------------- DEBUG -------------------------- */
729:
730: private static final String[] nodeTypeString = { "RootNode",
731: "DocTypeTag", "CommentTag", "ProcInsTag", "TextNode",
732: "StartTag", "EndTag", "StartEndTag", "SectionTag",
733: "AspTag", "PhpTag" };
734:
735: public String toString() {
736: String s = "";
737: Node n = this ;
738:
739: while (n != null) {
740: s += "[Node type=";
741: s += nodeTypeString[n.type];
742: s += ",element=";
743: if (n.element != null)
744: s += n.element;
745: else
746: s += "null";
747: if (n.type == TextNode || n.type == CommentTag
748: || n.type == ProcInsTag) {
749: s += ",text=";
750: if (n.textarray != null && n.start <= n.end) {
751: s += "\"";
752: s += Lexer.getString(n.textarray, n.start, n.end
753: - n.start);
754: s += "\"";
755: } else {
756: s += "null";
757: }
758: }
759: s += ",content=";
760: if (n.content != null)
761: s += n.content.toString();
762: else
763: s += "null";
764: s += "]";
765: if (n.next != null)
766: s += ",";
767: n = n.next;
768: }
769: return s;
770: }
771:
772: /* --------------------- END DEBUG ---------------------- */
773:
774: /* --------------------- DOM ---------------------------- */
775:
776: protected org.w3c.dom.Node adapter = null;
777:
778: protected org.w3c.dom.Node getAdapter() {
779: if (adapter == null) {
780: switch (this .type) {
781: case RootNode:
782: adapter = new DOMDocumentImpl(this );
783: break;
784: case StartTag:
785: case StartEndTag:
786: adapter = new DOMElementImpl(this );
787: break;
788: case DocTypeTag:
789: adapter = new DOMDocumentTypeImpl(this );
790: break;
791: case CommentTag:
792: adapter = new DOMCommentImpl(this );
793: break;
794: case TextNode:
795: adapter = new DOMTextImpl(this );
796: break;
797: case CDATATag:
798: adapter = new DOMCDATASectionImpl(this );
799: break;
800: case ProcInsTag:
801: adapter = new DOMProcessingInstructionImpl(this );
802: break;
803: default:
804: adapter = new DOMNodeImpl(this );
805: }
806: }
807: return adapter;
808: }
809:
810: protected Node cloneNode(boolean deep) {
811: Node node = (Node) this .clone();
812: if (deep) {
813: Node child;
814: Node newChild;
815: for (child = this .content; child != null; child = child.next) {
816: newChild = child.cloneNode(deep);
817: insertNodeAtEnd(node, newChild);
818: }
819: }
820: return node;
821: }
822:
823: protected void setType(short newType) {
824: this .type = newType;
825: }
826:
827: /* --------------------- END DOM ------------------------ */
828:
829: }
|