001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.generation;
018:
019: import java.io.BufferedInputStream;
020: import java.io.ByteArrayInputStream;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.PrintWriter;
024: import java.io.StringWriter;
025: import java.util.Map;
026: import java.util.Properties;
027:
028: import javax.servlet.http.HttpServletRequest;
029:
030: import org.apache.avalon.framework.activity.Disposable;
031: import org.apache.avalon.framework.configuration.Configurable;
032: import org.apache.avalon.framework.configuration.Configuration;
033: import org.apache.avalon.framework.configuration.ConfigurationException;
034: import org.apache.avalon.framework.parameters.Parameters;
035: import org.apache.avalon.framework.service.ServiceException;
036: import org.apache.avalon.framework.service.ServiceManager;
037: import org.apache.cocoon.ProcessingException;
038: import org.apache.cocoon.caching.CacheableProcessingComponent;
039: import org.apache.cocoon.components.source.SourceUtil;
040: import org.apache.cocoon.environment.ObjectModelHelper;
041: import org.apache.cocoon.environment.Request;
042: import org.apache.cocoon.environment.SourceResolver;
043: import org.apache.cocoon.environment.http.HttpEnvironment;
044: import org.apache.cocoon.util.PostInputStream;
045: import org.apache.cocoon.xml.XMLUtils;
046: import org.apache.cocoon.xml.dom.DOMStreamer;
047: import org.apache.excalibur.source.Source;
048: import org.apache.excalibur.source.SourceException;
049: import org.apache.excalibur.source.SourceValidity;
050: import org.apache.excalibur.xml.xpath.XPathProcessor;
051: import org.w3c.dom.NodeList;
052: import org.w3c.tidy.Tidy;
053: import org.xml.sax.SAXException;
054:
055: /**
056: * @cocoon.sitemap.component.documentation
057: * The html generator reads HTML from a source, converts it to XHTML
058: * and generates SAX Events.
059: *
060: * @cocoon.sitemap.component.name html
061: * @cocoon.sitemap.component.label content
062: * @cocoon.sitemap.component.logger sitemap.generator.html
063: * @cocoon.sitemap.component.documentation.caching
064: * Uses the last modification date of the xml document for validation
065: *
066: * @cocoon.sitemap.component.pooling.max 32
067: *
068: * @author <a href="mailto:dims@yahoo.com">Davanum Srinivas</a>
069: * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
070: * @author <a href="mailto:barozzi@nicolaken.com">Nicola Ken Barozzi</a>
071: * @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
072: *
073: * @version CVS $Id: HTMLGenerator.java 433543 2006-08-22 06:22:54Z crossley $
074: */
075: public class HTMLGenerator extends ServiceableGenerator implements
076: Configurable, CacheableProcessingComponent, Disposable {
077:
078: /** The parameter that specifies what request attribute to use, if any */
079: public static final String FORM_NAME = "form-name";
080:
081: /** The source, if coming from a file */
082: private Source inputSource;
083:
084: /** The source, if coming from the request */
085: private InputStream requestStream;
086:
087: /** XPATH expression */
088: private String xpath = null;
089:
090: /** XPath Processor */
091: private XPathProcessor processor = null;
092:
093: /** JTidy properties */
094: private Properties properties;
095:
096: public void service(ServiceManager manager) throws ServiceException {
097: super .service(manager);
098: this .processor = (XPathProcessor) this .manager
099: .lookup(XPathProcessor.ROLE);
100: }
101:
102: public void configure(Configuration config)
103: throws ConfigurationException {
104:
105: String configUrl = config.getChild("jtidy-config").getValue(
106: null);
107:
108: if (configUrl != null) {
109: org.apache.excalibur.source.SourceResolver resolver = null;
110: Source configSource = null;
111: try {
112: resolver = (org.apache.excalibur.source.SourceResolver) this .manager
113: .lookup(org.apache.excalibur.source.SourceResolver.ROLE);
114: configSource = resolver.resolveURI(configUrl);
115: if (getLogger().isDebugEnabled()) {
116: getLogger().debug(
117: "Loading configuration from "
118: + configSource.getURI());
119: }
120:
121: this .properties = new Properties();
122: this .properties.load(configSource.getInputStream());
123:
124: } catch (Exception e) {
125: getLogger().warn(
126: "Cannot load configuration from " + configUrl);
127: throw new ConfigurationException(
128: "Cannot load configuration from " + configUrl,
129: e);
130: } finally {
131: if (null != resolver) {
132: this .manager.release(resolver);
133: resolver.release(configSource);
134: }
135: }
136: }
137: }
138:
139: /**
140: * Recycle this component.
141: * All instance variables are set to <code>null</code>.
142: */
143: public void recycle() {
144: if (this .inputSource != null) {
145: this .resolver.release(this .inputSource);
146: this .inputSource = null;
147: this .requestStream = null;
148: }
149: this .xpath = null;
150: super .recycle();
151: }
152:
153: /**
154: * Setup the html generator.
155: * Try to get the last modification date of the source for caching.
156: */
157: public void setup(SourceResolver resolver, Map objectModel,
158: String src, Parameters par) throws ProcessingException,
159: SAXException, IOException {
160: super .setup(resolver, objectModel, src, par);
161:
162: Request request = ObjectModelHelper.getRequest(objectModel);
163:
164: if (src == null) {
165: // Handle this request as the StreamGenerator does (from the POST
166: // request or from a request parameter), but try to make sure
167: // that the output will be well-formed
168:
169: String contentType = request.getContentType();
170:
171: if (contentType == null) {
172: throw new IOException(
173: "Content-type was not specified for this request");
174: } else if (contentType
175: .startsWith("application/x-www-form-urlencoded")
176: || contentType.startsWith("multipart/form-data")) {
177: String requested = parameters.getParameter(FORM_NAME,
178: null);
179: if (requested == null) {
180: throw new ProcessingException(
181: "HtmlGenerator with no \"src\" parameter expects a sitemap parameter called '"
182: + FORM_NAME
183: + "' for handling form data");
184: }
185:
186: String sXml = request.getParameter(requested);
187:
188: requestStream = new ByteArrayInputStream(sXml
189: .getBytes());
190:
191: } else if (contentType.startsWith("text/plain")
192: || contentType.startsWith("text/xml")
193: || contentType.startsWith("application/xml")) {
194:
195: HttpServletRequest httpRequest = (HttpServletRequest) objectModel
196: .get(HttpEnvironment.HTTP_REQUEST_OBJECT);
197: if (httpRequest == null) {
198: throw new ProcessingException(
199: "This functionality only works in an http environment.");
200: }
201: int len = request.getContentLength();
202: if (len > 0) {
203: requestStream = new PostInputStream(httpRequest
204: .getInputStream(), len);
205: } else {
206: throw new IOException("getContentLen() == 0");
207: }
208: } else {
209: throw new IOException("Unexpected getContentType(): "
210: + request.getContentType());
211: }
212:
213: }
214:
215: xpath = request.getParameter("xpath");
216: if (xpath == null) {
217: xpath = par.getParameter("xpath", null);
218: }
219:
220: // append the request parameter to the URL if necessary
221: if (par.getParameterAsBoolean("copy-parameters", false)
222: && request.getQueryString() != null) {
223: StringBuffer query = new StringBuffer(super .source);
224: query.append(super .source.indexOf("?") == -1 ? '?' : '&');
225: query.append(request.getQueryString());
226: super .source = query.toString();
227: }
228:
229: try {
230: if (source != null) {
231: this .inputSource = resolver.resolveURI(super .source);
232: }
233: } catch (SourceException se) {
234: throw SourceUtil.handle(
235: "Unable to resolve " + super .source, se);
236: }
237: }
238:
239: /**
240: * Generate the unique key.
241: * This key must be unique inside the space of this component.
242: * This method must be invoked before the generateValidity() method.
243: *
244: * @return The generated key or <code>0</code> if the component
245: * is currently not cacheable.
246: */
247: public java.io.Serializable getKey() {
248: if (this .inputSource == null) {
249: return null;
250: }
251:
252: if (this .xpath != null) {
253: StringBuffer buffer = new StringBuffer(this .inputSource
254: .getURI());
255: buffer.append(':').append(this .xpath);
256: return buffer.toString();
257: } else {
258: return this .inputSource.getURI();
259: }
260: }
261:
262: /**
263: * Generate the validity object.
264: * Before this method can be invoked the generateKey() method
265: * must be invoked.
266: *
267: * @return The generated validity object or <code>null</code> if the
268: * component is currently not cacheable.
269: */
270: public SourceValidity getValidity() {
271: if (this .inputSource == null) {
272: return null;
273: }
274: return this .inputSource.getValidity();
275: }
276:
277: /**
278: * Generate XML data.
279: */
280: public void generate() throws IOException, SAXException,
281: ProcessingException {
282: try {
283: // Setup an instance of Tidy.
284: Tidy tidy = new Tidy();
285: tidy.setXmlOut(true);
286:
287: if (this .properties == null) {
288: tidy.setXHTML(true);
289: } else {
290: tidy.setConfigurationFromProps(this .properties);
291: }
292:
293: //Set Jtidy warnings on-off
294: tidy.setShowWarnings(getLogger().isWarnEnabled());
295: //Set Jtidy final result summary on-off
296: tidy.setQuiet(!getLogger().isInfoEnabled());
297: //Set Jtidy infos to a String (will be logged) instead of System.out
298: StringWriter stringWriter = new StringWriter();
299: PrintWriter errorWriter = new PrintWriter(stringWriter);
300: tidy.setErrout(errorWriter);
301:
302: // Extract the document using JTidy and stream it.
303:
304: if (inputSource != null)
305: requestStream = this .inputSource.getInputStream();
306:
307: org.w3c.dom.Document doc = tidy.parseDOM(
308: new BufferedInputStream(requestStream), null);
309:
310: // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
311: // tag; stripping.
312: XMLUtils.stripDuplicateAttributes(doc, null);
313:
314: errorWriter.flush();
315: errorWriter.close();
316: if (getLogger().isWarnEnabled()) {
317: getLogger().warn(stringWriter.toString());
318: }
319:
320: DOMStreamer domStreamer = new DOMStreamer(
321: this .contentHandler, this .lexicalHandler);
322: this .contentHandler.startDocument();
323:
324: if (xpath != null) {
325: NodeList nl = processor.selectNodeList(doc, xpath);
326: int length = nl.getLength();
327: for (int i = 0; i < length; i++) {
328: domStreamer.stream(nl.item(i));
329: }
330: } else {
331: // If the HTML document contained a <?xml ... declaration, tidy would have recognized
332: // this as a processing instruction (with a 'null' target), giving problems further
333: // on in the pipeline. Therefore we only serialize the document element.
334: domStreamer.stream(doc.getDocumentElement());
335: }
336: this .contentHandler.endDocument();
337: } catch (SAXException e) {
338: SourceUtil.handleSAXException(this .inputSource.getURI(), e);
339: }
340: }
341:
342: public void dispose() {
343: if (this.manager != null) {
344: this.manager.release(this.processor);
345: this.manager = null;
346: }
347: this.processor = null;
348: super.dispose();
349: }
350: }
|