001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of HtmlCleaner may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "HtmlCleaner" in the
035: subject line.
036: */
037:
038: package org.htmlcleaner;
039:
040: import java.io.BufferedWriter;
041: import java.io.IOException;
042: import java.io.Writer;
043: import java.util.Iterator;
044: import java.util.List;
045: import java.util.Map;
046:
047: /**
048: * <p>Abstract XML serializer - contains common logic for descendants.</p>
049: *
050: * Created by: Vladimir Nikic<br/>
051: * Date: November, 2006.
052: */
053: public abstract class XmlSerializer {
054:
055: protected final String XML_DECLARATION = "<?xml version=\"1.0\"?>";
056:
057: protected HtmlCleaner htmlCleaner;
058: protected BufferedWriter writer;
059:
060: protected XmlSerializer() {
061: }
062:
063: protected XmlSerializer(Writer writer, HtmlCleaner htmlCleaner) {
064: this .writer = new BufferedWriter(writer);
065: this .htmlCleaner = htmlCleaner;
066: }
067:
068: protected void createXml(TagNode tagNode) throws IOException {
069: if (!htmlCleaner.isOmitXmlDeclaration()) {
070: writer.write(XML_DECLARATION + "\n");
071: }
072:
073: if (!htmlCleaner.isOmitDoctypeDeclaration()) {
074: DoctypeToken doctypeToken = htmlCleaner.getDoctype();
075: if (doctypeToken != null) {
076: doctypeToken.serialize(this );
077: }
078: }
079:
080: serialize(tagNode);
081:
082: writer.flush();
083: writer.close();
084: }
085:
086: protected String escapeXml(String xmlContent) {
087: return Utils.escapeXml(xmlContent, htmlCleaner
088: .isAdvancedXmlEscape(), htmlCleaner
089: .isRecognizeUnicodeChars(), htmlCleaner
090: .isTranslateSpecialEntities());
091: }
092:
093: protected boolean dontEscape(TagNode tagNode) {
094: String tagName = tagNode.getName();
095: return htmlCleaner.isUseCdataForScriptAndStyle()
096: && ("script".equalsIgnoreCase(tagName) || "style"
097: .equalsIgnoreCase(tagName));
098: }
099:
100: protected boolean isScriptOrStyle(TagNode tagNode) {
101: String tagName = tagNode.getName();
102: return "script".equalsIgnoreCase(tagName)
103: || "style".equalsIgnoreCase(tagName);
104: }
105:
106: protected void serializeOpenTag(TagNode tagNode, boolean newLine)
107: throws IOException {
108: String tagName = tagNode.getName();
109: Map tagAtttributes = tagNode.getAttributes();
110: List tagChildren = tagNode.getChildren();
111:
112: writer.write("<" + tagName);
113: Iterator it = tagAtttributes.keySet().iterator();
114: while (it.hasNext()) {
115: String attName = (String) it.next();
116: String attValue = (String) tagAtttributes.get(attName);
117:
118: if (htmlCleaner.isOmitXmlnsAttributes()
119: && "xmlns".equals(attName)) {
120: continue;
121: }
122:
123: writer.write(" " + attName + "=\"" + escapeXml(attValue)
124: + "\"");
125: }
126:
127: if (tagChildren.size() == 0) {
128: writer.write("/>");
129: if (newLine) {
130: writer.write("\n");
131: }
132: } else if (dontEscape(tagNode)) {
133: writer.write("><![CDATA[");
134: } else {
135: writer.write(">");
136: }
137: }
138:
139: protected void serializeOpenTag(TagNode tagNode) throws IOException {
140: serializeOpenTag(tagNode, true);
141: }
142:
143: protected void serializeEndTag(TagNode tagNode, boolean newLine)
144: throws IOException {
145: String tagName = tagNode.getName();
146:
147: if (dontEscape(tagNode)) {
148: writer.write("]]>");
149: }
150:
151: writer.write("</" + tagName + ">");
152:
153: if (newLine) {
154: writer.write("\n");
155: }
156: }
157:
158: Writer getWriter() {
159: return writer;
160: }
161:
162: protected void serializeEndTag(TagNode tagNode) throws IOException {
163: serializeEndTag(tagNode, true);
164: }
165:
166: protected abstract void serialize(TagNode tagNode)
167: throws IOException;
168:
169: }
|