001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.cocoon.components.source;
019:
020: import org.apache.avalon.framework.component.Component;
021: import org.apache.avalon.framework.component.ComponentManager;
022: import org.apache.avalon.framework.logger.AbstractLogEnabled;
023: import org.apache.cocoon.ProcessingException;
024: import org.apache.cocoon.environment.ModifiableSource;
025: import org.apache.cocoon.util.ClassUtils;
026: import org.apache.excalibur.xml.sax.SAXParser;
027: import org.w3c.dom.Document;
028: import org.xml.sax.ContentHandler;
029: import org.xml.sax.InputSource;
030: import org.xml.sax.SAXException;
031:
032: import javax.xml.transform.OutputKeys;
033: import javax.xml.transform.Transformer;
034: import javax.xml.transform.TransformerFactory;
035: import javax.xml.transform.dom.DOMSource;
036: import javax.xml.transform.stream.StreamResult;
037:
038: import java.io.IOException;
039: import java.io.InputStream;
040: import java.io.StringWriter;
041: import java.lang.reflect.Method;
042: import java.util.Properties;
043:
044: /**
045: * This abstract class provides convenience methods to implement
046: * a stream based Source. Implement getInputStream(), getSystemId() and
047: * optionally override refresh(), recycle(), getLastModified() and
048: * getContentLength() to obtain a valid Source implementation.
049: * <p>
050: * This base implementation provides services to parse HTML sources
051: * (HTML is not valid XML) using JTidy, if present. The source is
052: * considered to contain HTML if <code>isHTMLContent()</code> returns
053: * true.
054: *
055: * @deprecated Use the new Avalon Excalibur Source Resolving
056: * @author <a href="mailto:sylvain@apache.org">Sylvain Wallez</a>
057: * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
058: * @version CVS $Id: AbstractStreamSource.java 433543 2006-08-22 06:22:54Z crossley $
059: */
060: public abstract class AbstractStreamSource extends AbstractLogEnabled
061: implements ModifiableSource {
062:
063: /** Is JTidy available? */
064: private static Class jtidyClass;
065:
066: /** Properties used for converting HTML to XML */
067: private static Properties xmlProperties;
068:
069: /** The TrAX factory for serializing xml */
070: public static TransformerFactory transformerFactory = TransformerFactory
071: .newInstance();
072:
073: /**
074: * Test if JTidy is available
075: */
076: static {
077: jtidyClass = null;
078: try {
079: jtidyClass = ClassUtils.loadClass("org.w3c.tidy.Tidy");
080: } catch (ClassNotFoundException cnfe) {
081: // ignore
082: }
083: xmlProperties = new Properties();
084: xmlProperties.put(OutputKeys.METHOD, "xml");
085: xmlProperties.put(OutputKeys.OMIT_XML_DECLARATION, "no");
086: }
087:
088: /** The ComponentManager needed for streaming */
089: protected ComponentManager manager;
090:
091: /**
092: * Construct a new object
093: */
094: protected AbstractStreamSource(ComponentManager manager) {
095: this .manager = manager;
096: }
097:
098: /**
099: * Does this source contain HTML ? If true, JTidy will be used (if available) to
100: * parse the input as XML.
101: * <p>
102: * The default here is to return false. Concrete subclasses should override
103: * this if needed.
104: */
105: protected boolean isHTMLContent() {
106: return false;
107: }
108:
109: /**
110: * Return a new <code>InputSource</code> object
111: */
112: public InputSource getInputSource() throws IOException,
113: ProcessingException {
114:
115: InputStream stream = this .getInputStream();
116: if (jtidyClass != null && isHTMLContent()) {
117: try {
118: final Object xhtmlconvert = jtidyClass.newInstance();
119: Method m = jtidyClass.getMethod("setXmlOut",
120: new Class[] { Class
121: .forName("java.lang.Boolean") });
122: m.invoke(xhtmlconvert, new Object[] { Boolean.TRUE });
123: m = jtidyClass.getMethod("setXHTML",
124: new Class[] { Class
125: .forName("java.lang.Boolean") });
126: m.invoke(xhtmlconvert, new Object[] { Boolean.TRUE });
127: m = jtidyClass.getMethod("setShowWarnings",
128: new Class[] { Class
129: .forName("java.lang.Boolean") });
130: m.invoke(xhtmlconvert, new Object[] { Boolean.FALSE });
131: m = jtidyClass.getMethod("parseDOM", new Class[] {
132: Class.forName("java.io.InputStream"),
133: Class.forName("java.io.OutputStream") });
134: final Document doc = (Document) m.invoke(xhtmlconvert,
135: new Object[] { stream, null });
136: final StringWriter writer = new StringWriter();
137: final Transformer transformer;
138: transformer = transformerFactory.newTransformer();
139: transformer.setOutputProperties(xmlProperties);
140: transformer.transform(new DOMSource(doc),
141: new StreamResult(writer));
142: final String xmlstring = writer.toString();
143: InputSource newObject = new InputSource(
144: new java.io.StringReader(xmlstring));
145: newObject.setSystemId(this .getSystemId());
146: return newObject;
147: } catch (Exception ignore) {
148: // Let someone else worry about what we got . This is as before.
149: this .refresh();
150: stream = this .getInputStream();
151: }
152: }
153: InputSource newObject = new InputSource(stream);
154: newObject.setSystemId(this .getSystemId());
155: return newObject;
156: }
157:
158: /**
159: * Stream content to a content handler or to an XMLConsumer.
160: *
161: * @throws SAXException if failed to parse source document.
162: */
163: public void toSAX(ContentHandler handler) throws SAXException {
164: SAXParser parser = null;
165: try {
166: parser = (SAXParser) this .manager.lookup(SAXParser.ROLE);
167:
168: parser.parse(this .getInputSource(), handler);
169: } catch (SAXException e) {
170: // Preserve original exception
171: throw e;
172: } catch (Exception e) {
173: throw new SAXException("Exception during processing of "
174: + this .getSystemId(), e);
175: } finally {
176: if (parser != null)
177: this .manager.release((Component) parser);
178: }
179: }
180:
181: /**
182: * Override this method to set the Content Length
183: *
184: */
185: public long getContentLength() {
186: return -1;
187: }
188:
189: /**
190: * Override this method to set the Last Modification date
191: *
192: */
193: public long getLastModified() {
194: return 0;
195: }
196:
197: /**
198: * Returns <code>true</code> if <code>getInputStream()</code> succeeds.
199: * Subclasses can provide a more efficient implementation.
200: */
201: public boolean exists() {
202: try {
203: InputStream stream = getInputStream();
204: stream.close();
205: return true;
206: } catch (Exception e) {
207: return false;
208: }
209: }
210:
211: /**
212: * To be overriden in concrete subclasses if needed.
213: */
214: public void recycle() {
215: }
216:
217: /**
218: * To be overriden in concrete subclasses if needed.
219: */
220: public void refresh() {
221: }
222: }
|