001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of HtmlCleaner may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "HtmlCleaner" in the
035: subject line.
036: */
037:
038: package org.htmlcleaner;
039:
040: import java.io.IOException;
041: import java.io.Writer;
042: import java.util.*;
043:
044: /**
045: * <p>Pretty XML serializer - creates resulting XML with indenting lines.</p>
046: *
047: * Created by: Vladimir Nikic<br/>
048: * Date: November, 2006.
049: */
050: public class PrettyXmlSerializer extends XmlSerializer {
051:
052: private static final String INDENTATION_STRING = "\t";
053:
054: protected PrettyXmlSerializer(Writer writer, HtmlCleaner htmlCleaner) {
055: super (writer, htmlCleaner);
056: }
057:
058: protected void serialize(TagNode tagNode) throws IOException {
059: serializePrettyXml(tagNode, 0);
060: }
061:
062: /**
063: * @param level
064: * @return Appropriate indentation for the specified depth.
065: */
066: private String indent(int level) {
067: String result = "";
068: while (level > 0) {
069: result += INDENTATION_STRING;
070: level--;
071: }
072:
073: return result;
074: }
075:
076: private String getIndentedText(String content, int level) {
077: String indent = indent(level);
078: StringBuffer result = new StringBuffer(content.length());
079: StringTokenizer tokenizer = new StringTokenizer(content, "\n\r");
080:
081: while (tokenizer.hasMoreTokens()) {
082: String line = tokenizer.nextToken().trim();
083: if (!"".equals(line)) {
084: result.append(indent + line + "\n");
085: }
086: }
087:
088: return result.toString();
089: }
090:
091: private String getSingleLineOfChildren(List children) {
092: StringBuffer result = new StringBuffer();
093: Iterator childrenIt = children.iterator();
094: boolean isFirst = true;
095:
096: while (childrenIt.hasNext()) {
097: Object child = childrenIt.next();
098:
099: if (!(child instanceof ContentToken)) {
100: return null;
101: } else {
102: ContentToken contentToken = (ContentToken) child;
103: String content = contentToken.getContent().toString();
104:
105: // if first item trims it from left
106: if (isFirst) {
107: content = Utils.ltrim(content);
108: }
109:
110: // if last item trims it from right
111: if (!childrenIt.hasNext()) {
112: content = Utils.rtrim(content);
113: }
114:
115: if (content.indexOf("\n") >= 0
116: || content.indexOf("\r") >= 0) {
117: return null;
118: }
119: result.append(content);
120: }
121:
122: isFirst = false;
123: }
124:
125: return result.toString();
126: }
127:
128: private void serializePrettyXml(List nodes, int level,
129: boolean dontEscape) throws IOException {
130: Iterator childrenIt = nodes.iterator();
131: while (childrenIt.hasNext()) {
132: Object child = childrenIt.next();
133: if (child instanceof TagNode) {
134: serializePrettyXml((TagNode) child, level + 1);
135: } else if (child instanceof ContentToken) {
136: ContentToken contentToken = (ContentToken) child;
137: String content = contentToken.getContent();
138: //if ( !dontEscape ) {
139: // content = escapeXml(content).toString();
140: //} else {
141: content = content.replaceAll("]]>", "]]&");
142: //}
143: writer.write(getIndentedText(content, level + 1));
144: } else if (child instanceof CommentToken) {
145: CommentToken commentToken = (CommentToken) child;
146: String content = commentToken.getCommentedContent();
147: writer.write(getIndentedText(content, level + 1));
148: } else if (child instanceof List) {
149: serializePrettyXml((List) child, level, true);
150: }
151: }
152: }
153:
154: protected void serializePrettyXml(TagNode tagNode, int level)
155: throws IOException {
156: List tagChildren = tagNode.getChildren();
157: String indent = indent(level);
158:
159: writer.write(indent);
160: serializeOpenTag(tagNode);
161:
162: if (!tagChildren.isEmpty()) {
163: String singleLine = getSingleLineOfChildren(tagChildren);
164: boolean dontEscape = dontEscape(tagNode);
165: if (singleLine != null) {
166: if (!dontEscape(tagNode)) {
167: writer.write(escapeXml(singleLine));
168: } else {
169: writer.write(singleLine
170: .replaceAll("]]>", "]]&"));
171: }
172: } else {
173: writer.write("\n");
174: serializePrettyXml(tagChildren, level, dontEscape);
175: }
176:
177: if (singleLine == null) {
178: writer.write(indent);
179: }
180:
181: serializeEndTag(tagNode);
182: }
183: }
184:
185: }
|