001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.generation;
018:
019: import org.apache.cocoon.ProcessingException;
020: import org.apache.cocoon.ResourceNotFoundException;
021: import org.apache.cocoon.environment.ObjectModelHelper;
022: import org.apache.cocoon.environment.Request;
023: import org.apache.cocoon.environment.http.HttpEnvironment;
024: import org.apache.cocoon.servlet.multipart.Part;
025: import org.apache.cocoon.util.PostInputStream;
026: import org.apache.excalibur.xml.sax.SAXParser;
027: import org.xml.sax.InputSource;
028: import org.xml.sax.SAXException;
029:
030: import javax.servlet.http.HttpServletRequest;
031:
032: import java.io.IOException;
033: import java.io.InputStreamReader;
034: import java.io.Reader;
035: import java.io.StringReader;
036:
037: /**
038: * @cocoon.sitemap.component.documentation
039: * The <code>StreamGenerator</code> is a class that reads XML from a
040: * request InputStream and generates SAX Events.
041: *
042: * @cocoon.sitemap.component.name stream
043: * @cocoon.sitemap.component.label content
044: * @cocoon.sitemap.component.logger sitemap.generator.stream
045: *
046: * @cocoon.sitemap.component.pooling.max 16
047: *
048: * For the POST requests with a mimetype of application/x-www-form-urlencoded,
049: * or multipart/form-data the xml data is expected to be associated with the
050: * sitemap parameter 'form-name'.
051: *
052: * For the POST requests with mimetypes: text/plain, text/xml,
053: * application/xhtml+xml, application/xml the xml data is in the body of the POST request and
054: * its length is specified by the value returned by getContentLength()
055: * method. The StreamGenerator uses helper
056: * org.apache.cocoon.util.PostInputStream class for InputStream
057: * reading operations. At the time that Parser is reading the data
058: * out of InputStream - Parser has no knowledge about the length of
059: * data to be read. The only way to signal to the Parser that all
060: * data was read from the InputStream is to control reading operation-
061: * PostInputStream--and to return to the requestor '-1' when the
062: * number of bytes read is equal to the getContentLength() value.
063: *
064: * @author <a href="mailto:Kinga_Dziembowski@hp.com">Kinga Dziembowski</a>
065: * @version CVS $Id: StreamGenerator.java 433543 2006-08-22 06:22:54Z crossley $
066: */
067: public class StreamGenerator extends ServiceableGenerator {
068:
069: /** The parameter holding the name associated with the xml data **/
070: public static final String FORM_NAME = "form-name";
071:
072: /** The input source */
073: private InputSource inputSource;
074:
075: /**
076: * Recycle this component.
077: * All instance variables are set to <code>null</code>.
078: */
079: public void recycle() {
080: super .recycle();
081: this .inputSource = null;
082: }
083:
084: /**
085: * Generate XML data out of request InputStream.
086: */
087: public void generate() throws IOException, SAXException,
088: ProcessingException {
089: SAXParser parser = null;
090: int len = 0;
091: String contentType = null;
092:
093: Request request = ObjectModelHelper
094: .getRequest(this .objectModel);
095: try {
096: contentType = request.getContentType();
097: if (contentType == null) {
098: contentType = parameters.getParameter(
099: "defaultContentType", null);
100: if (getLogger().isDebugEnabled()) {
101: getLogger().debug(
102: "no Content-Type header - using contentType parameter: "
103: + contentType);
104: }
105: if (contentType == null) {
106: throw new IOException(
107: "both Content-Type header and defaultContentType parameter are not set");
108: }
109: }
110: if (contentType
111: .startsWith("application/x-www-form-urlencoded")
112: || contentType.startsWith("multipart/form-data")) {
113: String parameter = parameters.getParameter(FORM_NAME,
114: null);
115: if (parameter == null) {
116: throw new ProcessingException(
117: "StreamGenerator expects a sitemap parameter called '"
118: + FORM_NAME
119: + "' for handling form data");
120: }
121: Object xmlObject = request.get(parameter);
122: Reader xmlReader = null;
123: if (xmlObject instanceof String) {
124: xmlReader = new StringReader((String) xmlObject);
125: } else if (xmlObject instanceof Part) {
126: xmlReader = new InputStreamReader(
127: ((Part) xmlObject).getInputStream());
128: } else {
129: throw new ProcessingException(
130: "Unknown request object encountered named "
131: + parameter + " : " + xmlObject);
132: }
133: inputSource = new InputSource(xmlReader);
134: } else if (contentType.startsWith("text/plain")
135: || contentType.startsWith("text/xml")
136: || contentType.startsWith("application/xhtml+xml")
137: || contentType.startsWith("application/xml")) {
138:
139: HttpServletRequest httpRequest = (HttpServletRequest) objectModel
140: .get(HttpEnvironment.HTTP_REQUEST_OBJECT);
141: if (httpRequest == null) {
142: throw new ProcessingException(
143: "This feature is only available in an http environment.");
144: }
145: len = request.getContentLength();
146: if (len > 0) {
147: PostInputStream anStream = new PostInputStream(
148: httpRequest.getInputStream(), len);
149: inputSource = new InputSource(anStream);
150: } else {
151: throw new IOException("getContentLen() == 0");
152: }
153: } else {
154: throw new IOException("Unexpected getContentType(): "
155: + request.getContentType());
156: }
157:
158: if (getLogger().isDebugEnabled()) {
159: getLogger().debug(
160: "processing stream ContentType=" + contentType
161: + " ContentLen=" + len);
162: }
163: String charset = getCharacterEncoding(request, contentType);
164: if (charset != null) {
165: this .inputSource.setEncoding(charset);
166: }
167: parser = (SAXParser) this .manager.lookup(SAXParser.ROLE);
168: parser.parse(this .inputSource, super .xmlConsumer);
169: } catch (IOException e) {
170: getLogger().error("StreamGenerator.generate()", e);
171: throw new ResourceNotFoundException(
172: "StreamGenerator could not find resource", e);
173: } catch (SAXException e) {
174: getLogger().error("StreamGenerator.generate()", e);
175: throw (e);
176: } catch (Exception e) {
177: getLogger().error("Could not get parser", e);
178: throw new ProcessingException(
179: "Exception in StreamGenerator.generate()", e);
180: } finally {
181: this .manager.release(parser);
182: }
183: }
184:
185: /**
186: * Content type HTTP header can contains character encodinf info
187: * for ex. Content-Type: text/xml; charset=UTF-8
188: * If the servlet is following spec 2.3 and higher the servlet API can be used to retrieve character encoding part of
189: * Content-Type header. Some containers can choose to not unpack charset info - the spec is not strong about it.
190: * in any case this method can be used as a latest resource to retrieve the passed charset value.
191: * <code>null</code> is returned.
192: * It is very common mistake to send : Content-Type: text/xml; charset="UTF-8".
193: * Some containers are not filtering this mistake and the processing results in exception..
194: * The getCharacterEncoding() compensates for above mistake.
195: *
196: * @param contentType value associated with Content-Type HTTP header.
197: */
198: public String getCharacterEncoding(Request req, String contentType) {
199: String charencoding = null;
200: String charset = "charset=";
201: if (contentType == null) {
202: return null;
203: }
204: int idx = contentType.indexOf(charset);
205: if (idx == -1) {
206: return null;
207: }
208: try {
209: charencoding = req.getCharacterEncoding();
210:
211: if (charencoding != null) {
212: getLogger().debug(
213: "charset from container: " + charencoding);
214: charencoding = charencoding.trim();
215: if ((charencoding.length() > 2)
216: && (charencoding.startsWith("\""))
217: && (charencoding.endsWith("\""))) {
218: charencoding = charencoding.substring(1,
219: charencoding.length() - 1);
220: }
221: getLogger()
222: .debug(
223: "charset from container clean: "
224: + charencoding);
225: return charencoding;
226: } else {
227: return extractCharset(contentType, idx);
228: }
229: } catch (Throwable e) {
230: // We will be there if the container do not implement getCharacterEncoding() method
231: return extractCharset(contentType, idx);
232: }
233: }
234:
235: protected String extractCharset(String contentType, int idx) {
236: String charencoding = null;
237: String charset = "charset=";
238:
239: getLogger().debug("charset from extractCharset");
240: charencoding = contentType.substring(idx + charset.length());
241: int idxEnd = charencoding.indexOf(";");
242: if (idxEnd != -1) {
243: charencoding = charencoding.substring(0, idxEnd);
244: }
245: charencoding = charencoding.trim();
246: if ((charencoding.length() > 2)
247: && (charencoding.startsWith("\""))
248: && (charencoding.endsWith("\""))) {
249: charencoding = charencoding.substring(1, charencoding
250: .length() - 1);
251: }
252: getLogger().debug(
253: "charset from extractCharset: " + charencoding);
254: return charencoding.trim();
255:
256: }
257: }
|