001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.serializers;
018:
019: import org.apache.avalon.framework.configuration.Configuration;
020: import org.apache.avalon.framework.configuration.ConfigurationException;
021: import org.apache.cocoon.components.serializers.encoding.HTMLEncoder;
022: import org.apache.cocoon.components.serializers.util.DocType;
023: import org.apache.cocoon.components.serializers.util.SGMLDocType;
024: import org.xml.sax.SAXException;
025:
026: /**
027: * <p>A serializer converting XHTML into plain old HTML.</p>
028: *
029: * <p>For configuration options of this serializer, please look at the
030: * {@link XHTMLSerializer} and {@link EncodingSerializer}.</p>
031: *
032: * <p>Any of the XHTML document type declared or used will be converted into
033: * its HTML 4.01 counterpart, and in addition to those a "compatible" doctype
034: * can be supported to exploit a couple of shortcuts into MSIE's rendering
035: * engine. The values for the <code>doctype-default</code> can then be:</p>
036: *
037: * <dl>
038: * <dt>"<code>none</code>"</dt>
039: * <dd>Not to emit any dococument type declaration.</dd>
040: * <dt>"<code>compatible</code>"</dt>
041: * <dd>The HTML 4.01 Transitional (exploiting MSIE shortcut).</dd>
042: * <dt>"<code>strict</code>"</dt>
043: * <dd>The HTML 4.01 Strict document type.</dd>
044: * <dt>"<code>loose</code>"</dt>
045: * <dd>The HTML 4.01 Transitional document type.</dd>
046: * <dt>"<code>frameset</code>"</dt>
047: * <dd>The HTML 4.01 Frameset document type.</dd>
048: * </dl>
049: *
050: * @version CVS $Id: HTMLSerializer.java 515096 2007-03-06 12:11:29Z cziegeler $
051: */
052: public class HTMLSerializer extends XHTMLSerializer {
053:
054: /** A cross-browser compatible very simple document type declaration. */
055: public static final DocType HTML401_DOCTYPE_COMPATIBLE = new SGMLDocType(
056: "HTML", "-//W3C//DTD HTML 4.01 Transitional//EN", null);
057:
058: /** A representation of the HTML 4.01 strict document type. */
059: public static final DocType HTML401_DOCTYPE_STRICT = new SGMLDocType(
060: "HTML", "-//W3C//DTD HTML 4.01//EN",
061: "http://www.w3.org/TR/html4/strict.dtd");
062:
063: /** A representation of the HTML 4.01 transitional document type. */
064: public static final DocType HTML401_DOCTYPE_TRANSITIONAL = new SGMLDocType(
065: "HTML", "-//W3C//DTD HTML 4.01 Transitional//EN",
066: "http://www.w3.org/TR/html4/loose.dtd");
067:
068: /** A representation of the HTML 4.01 frameset document type. */
069: public static final DocType HTML401_DOCTYPE_FRAMESET = new SGMLDocType(
070: "HTML", "-//W3C//DTD HTML 4.01 Frameset//EN",
071: "http://www.w3.org/TR/html4/frameset.dtd");
072:
073: /* ====================================================================== */
074:
075: private static final HTMLEncoder HTML_ENCODER = new HTMLEncoder();
076:
077: protected boolean encodeCharacters = true;
078:
079: /**
080: * Create a new instance of this <code>HTMLSerializer</code>
081: */
082: public HTMLSerializer() {
083: super (HTML_ENCODER);
084: }
085:
086: /**
087: * Configure this instance by selecting the default document type to use.
088: */
089: public void configure(Configuration conf)
090: throws ConfigurationException {
091: super .configure(conf);
092:
093: String doctype = conf.getChild("doctype-default")
094: .getValue(null);
095: if ("none".equalsIgnoreCase(doctype)) {
096: this .doctype_default = null;
097: } else if ("compatible".equalsIgnoreCase(doctype)) {
098: this .doctype_default = HTML401_DOCTYPE_COMPATIBLE;
099: } else if ("strict".equalsIgnoreCase(doctype)) {
100: this .doctype_default = HTML401_DOCTYPE_STRICT;
101: } else if ("loose".equalsIgnoreCase(doctype)) {
102: this .doctype_default = HTML401_DOCTYPE_TRANSITIONAL;
103: } else if ("frameset".equalsIgnoreCase(doctype)) {
104: this .doctype_default = HTML401_DOCTYPE_FRAMESET;
105: } else {
106: /* Default is compatible (MSIE hack) */
107: this .doctype_default = HTML401_DOCTYPE_COMPATIBLE;
108: }
109: }
110:
111: /* ====================================================================== */
112:
113: /** Empty namespaces declaration. */
114: private static final String NAMESPACES[][] = new String[0][0];
115:
116: /** Check if the URI is allowed by this serializer. */
117: private boolean checkNamespace(String nsuri) {
118: if (nsuri.length() == 0)
119: return (true);
120: if (XHTML1_NAMESPACE.equals(nsuri))
121: return (true);
122: return (false);
123: }
124:
125: /* ====================================================================== */
126:
127: /**
128: * Write the XML document header.
129: * <p>
130: * This method overrides the default <code>XMLSerializer</code>.behaviour.
131: * </p>
132: */
133: public void head() throws SAXException {
134: // NO NOTHING!
135: }
136:
137: /**
138: * Receive notification of the beginning of the document body.
139: *
140: * @param nsuri The namespace URI of the root element.
141: * @param local The local name of the root element.
142: * @param qual The fully-qualified name of the root element.
143: */
144: public void body(String nsuri, String local, String qual)
145: throws SAXException {
146: String name = local.toUpperCase();
147: if (!this .checkNamespace(nsuri)) {
148: throw new SAXException("Unsupported namespace \"" + nsuri
149: + "\" " + "for HTML root element \"" + qual + "\""
150: + this .getLocation());
151: }
152:
153: if (this .doctype == null)
154: this .doctype = this .doctype_default;
155:
156: if (XHTML1_DOCTYPE_STRICT.equals(this .doctype)) {
157: this .doctype = HTML401_DOCTYPE_STRICT;
158: } else if (XHTML1_DOCTYPE_TRANSITIONAL.equals(this .doctype)) {
159: this .doctype = HTML401_DOCTYPE_TRANSITIONAL;
160: } else if (XHTML1_DOCTYPE_FRAMESET.equals(this .doctype)) {
161: this .doctype = HTML401_DOCTYPE_FRAMESET;
162: } else if (this .doctype != null) {
163: /* The root element is uppercase, always!!! */
164: this .doctype = new SGMLDocType(this .doctype.getName()
165: .toUpperCase(), this .doctype.getPublicId(),
166: this .doctype.getSystemId());
167: }
168: super .body(XHTML1_NAMESPACE, name, name);
169: }
170:
171: /**
172: * Receive notification of the beginning of an element.
173: */
174: public void startElementImpl(String nsuri, String local,
175: String qual, String namespaces[][], String attributes[][])
176: throws SAXException {
177: String name = local.toUpperCase();
178: if (!this .checkNamespace(nsuri)) {
179: throw new SAXException("Unsupported namespace \"" + nsuri
180: + "\" " + "for HTML element \"" + qual + "\""
181: + this .getLocation());
182: }
183:
184: int length = 0;
185: for (int x = 0; x < attributes.length; x++) {
186: if (checkNamespace(attributes[x][ATTRIBUTE_NSURI]))
187: length++;
188: }
189:
190: String at[][] = new String[length][ATTRIBUTE_LENGTH];
191: length = 0;
192: for (int x = 0; x < attributes.length; x++) {
193: if (!checkNamespace(attributes[x][ATTRIBUTE_NSURI]))
194: continue;
195:
196: String at_name = attributes[x][ATTRIBUTE_LOCAL]
197: .toLowerCase();
198: at[length][ATTRIBUTE_NSURI] = XHTML1_NAMESPACE;
199: at[length][ATTRIBUTE_LOCAL] = at_name;
200: at[length][ATTRIBUTE_QNAME] = at_name;
201: at[length][ATTRIBUTE_VALUE] = attributes[x][ATTRIBUTE_VALUE];
202: length++;
203: }
204:
205: // script and style are CDATA sections by default, so no encoding
206: if ("SCRIPT".equals(name) || "STYLE".equals(name)) {
207: this .encodeCharacters = false;
208: }
209: super .startElementImpl(XHTML1_NAMESPACE, name, name,
210: NAMESPACES, at);
211: }
212:
213: /**
214: * Receive notification of the end of an element.
215: */
216: public void endElementImpl(String nsuri, String local, String qual)
217: throws SAXException {
218: this .closeElement(false);
219:
220: String name = local.toUpperCase();
221: if (!this .checkNamespace(nsuri)) {
222: throw new SAXException("Unsupported namespace \"" + nsuri
223: + "\" " + "for HTML element \"" + qual + "\""
224: + this .getLocation());
225: }
226:
227: if (name.equals("AREA"))
228: return;
229: if (name.equals("BASE"))
230: return;
231: if (name.equals("BASEFONT"))
232: return;
233: if (name.equals("BR"))
234: return;
235: if (name.equals("COL"))
236: return;
237: if (name.equals("FRAME"))
238: return;
239: if (name.equals("HR"))
240: return;
241: if (name.equals("IMG"))
242: return;
243: if (name.equals("INPUT"))
244: return;
245: if (name.equals("ISINDEX"))
246: return;
247: if (name.equals("LINK"))
248: return;
249: if (name.equals("META"))
250: return;
251: if (name.equals("PARAM"))
252: return;
253:
254: // script and style are CDATA sections by default, so no encoding
255: if ("SCRIPT".equals(name) || "STYLE".equals(name)) {
256: this .encodeCharacters = true;
257: }
258: super .endElementImpl(XHTML1_NAMESPACE, name, name);
259: }
260:
261: /**
262: * Encode and write a specific part of an array of characters.
263: */
264: protected void encode(char data[], int start, int length)
265: throws SAXException {
266: if (!this.encodeCharacters) {
267: this.write(data, start, length);
268: return;
269: }
270: super.encode(data, start, length);
271: }
272: }
|