001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.servlet;
017:
018: import java.io.File;
019: import java.io.FileInputStream;
020: import java.io.IOException;
021: import java.io.InputStream;
022: import java.io.InputStreamReader;
023: import java.io.Reader;
024: import java.io.UnsupportedEncodingException;
025: import java.net.URL;
026: import java.net.URLDecoder;
027: import java.util.ArrayList;
028: import java.util.HashMap;
029: import java.util.Iterator;
030: import java.util.List;
031: import java.util.Map;
032: import java.util.logging.Logger;
033:
034: import javax.servlet.http.HttpServletRequest;
035:
036: import org.apache.commons.fileupload.FileItem;
037: import org.apache.commons.fileupload.disk.DiskFileItemFactory;
038: import org.apache.commons.fileupload.servlet.ServletFileUpload;
039: import org.apache.solr.core.Config;
040: import org.apache.solr.core.SolrConfig;
041: import org.apache.solr.core.SolrCore;
042: import org.apache.solr.core.SolrException;
043: import org.apache.solr.util.ContentStream;
044: import org.apache.solr.request.MultiMapSolrParams;
045: import org.apache.solr.request.ServletSolrParams;
046: import org.apache.solr.request.SolrParams;
047: import org.apache.solr.request.SolrQueryRequest;
048: import org.apache.solr.request.SolrQueryRequestBase;
049: import org.apache.solr.util.ContentStreamBase;
050:
051: public class SolrRequestParsers {
052: final Logger log = Logger.getLogger(SolrRequestParsers.class
053: .getName());
054:
055: // Should these constants be in a more public place?
056: public static final String MULTIPART = "multipart";
057: public static final String RAW = "raw";
058: public static final String SIMPLE = "simple";
059: public static final String STANDARD = "standard";
060:
061: private HashMap<String, SolrRequestParser> parsers;
062: private SolrCore core;
063: private boolean enableRemoteStreams = false;
064: private StandardRequestParser standard;
065:
066: public SolrRequestParsers(SolrCore core, Config config) {
067: this .core = core;
068:
069: // Read the configuration
070: long uploadLimitKB = SolrConfig.config
071: .getInt(
072: "requestDispatcher/requestParsers/@multipartUploadLimitInKB",
073: 2000); // 2MB default
074:
075: this .enableRemoteStreams = SolrConfig.config
076: .getBool(
077: "requestDispatcher/requestParsers/@enableRemoteStreaming",
078: false);
079:
080: MultipartRequestParser multi = new MultipartRequestParser(
081: uploadLimitKB);
082: RawRequestParser raw = new RawRequestParser();
083: standard = new StandardRequestParser(multi, raw);
084:
085: // I don't see a need to have this publically configured just yet
086: // adding it is trivial
087: parsers = new HashMap<String, SolrRequestParser>();
088: parsers.put(MULTIPART, multi);
089: parsers.put(RAW, raw);
090: parsers.put(SIMPLE, new SimpleRequestParser());
091: parsers.put(STANDARD, standard);
092: parsers.put("", standard);
093: }
094:
095: public SolrQueryRequest parse(String path, HttpServletRequest req)
096: throws Exception {
097: SolrRequestParser parser = standard;
098:
099: // TODO -- in the future, we could pick a different parser based on the request
100:
101: // Pick the parer from the request...
102: ArrayList<ContentStream> streams = new ArrayList<ContentStream>(
103: 1);
104: SolrParams params = parser.parseParamsAndFillStreams(req,
105: streams);
106: SolrQueryRequest sreq = buildRequestFrom(params, streams);
107:
108: // Handlers and loggin will want to know the path. If it contains a ':'
109: // the handler could use it for RESTfull URLs
110: sreq.getContext().put("path", path);
111: return sreq;
112: }
113:
114: SolrQueryRequest buildRequestFrom(SolrParams params,
115: List<ContentStream> streams) throws Exception {
116: // The content type will be applied to all streaming content
117: String contentType = params.get(SolrParams.STREAM_CONTENTTYPE);
118:
119: // Handle anything with a remoteURL
120: String[] strs = params.getParams(SolrParams.STREAM_URL);
121: if (strs != null) {
122: if (!enableRemoteStreams) {
123: throw new SolrException(
124: SolrException.ErrorCode.BAD_REQUEST,
125: "Remote Streaming is disabled.");
126: }
127: for (final String url : strs) {
128: ContentStreamBase stream = new ContentStreamBase.URLStream(
129: new URL(url));
130: if (contentType != null) {
131: stream.setContentType(contentType);
132: }
133: streams.add(stream);
134: }
135: }
136:
137: // Handle streaming files
138: strs = params.getParams(SolrParams.STREAM_FILE);
139: if (strs != null) {
140: if (!enableRemoteStreams) {
141: throw new SolrException(
142: SolrException.ErrorCode.BAD_REQUEST,
143: "Remote Streaming is disabled.");
144: }
145: for (final String file : strs) {
146: ContentStreamBase stream = new ContentStreamBase.FileStream(
147: new File(file));
148: if (contentType != null) {
149: stream.setContentType(contentType);
150: }
151: streams.add(stream);
152: }
153: }
154:
155: // Check for streams in the request parameters
156: strs = params.getParams(SolrParams.STREAM_BODY);
157: if (strs != null) {
158: for (final String body : strs) {
159: ContentStreamBase stream = new ContentStreamBase.StringStream(
160: body);
161: if (contentType != null) {
162: stream.setContentType(contentType);
163: }
164: streams.add(stream);
165: }
166: }
167:
168: SolrQueryRequestBase q = new SolrQueryRequestBase(core, params) {
169: };
170: if (streams != null && streams.size() > 0) {
171: q.setContentStreams(streams);
172: }
173: return q;
174: }
175:
176: /**
177: * Given a standard query string map it into solr params
178: */
179: public static MultiMapSolrParams parseQueryString(String queryString) {
180: Map<String, String[]> map = new HashMap<String, String[]>();
181: if (queryString != null && queryString.length() > 0) {
182: try {
183: for (String kv : queryString.split("&")) {
184: int idx = kv.indexOf('=');
185: if (idx > 0) {
186: String name = URLDecoder.decode(kv.substring(0,
187: idx), "UTF-8");
188: String value = URLDecoder.decode(kv
189: .substring(idx + 1), "UTF-8");
190: MultiMapSolrParams.addParam(name, value, map);
191: } else {
192: String name = URLDecoder.decode(kv, "UTF-8");
193: MultiMapSolrParams.addParam(name, "", map);
194: }
195: }
196: } catch (UnsupportedEncodingException uex) {
197: throw new SolrException(
198: SolrException.ErrorCode.SERVER_ERROR, uex);
199: }
200: }
201: return new MultiMapSolrParams(map);
202: }
203: }
204:
205: //-----------------------------------------------------------------
206: //-----------------------------------------------------------------
207:
208: // I guess we don't really even need the interface, but i'll keep it here just for kicks
209: interface SolrRequestParser {
210: public SolrParams parseParamsAndFillStreams(
211: final HttpServletRequest req,
212: ArrayList<ContentStream> streams) throws Exception;
213: }
214:
215: //-----------------------------------------------------------------
216: //-----------------------------------------------------------------
217:
218: /**
219: * The simple parser just uses the params directly
220: */
221: class SimpleRequestParser implements SolrRequestParser {
222: public SolrParams parseParamsAndFillStreams(
223: final HttpServletRequest req,
224: ArrayList<ContentStream> streams) throws Exception {
225: return new ServletSolrParams(req);
226: }
227: }
228:
229: /**
230: * Wrap an HttpServletRequest as a ContentStream
231: */
232: class HttpRequestContentStream extends ContentStreamBase {
233: private final HttpServletRequest req;
234:
235: public HttpRequestContentStream(HttpServletRequest req)
236: throws IOException {
237: this .req = req;
238:
239: contentType = req.getContentType();
240: // name = ???
241: // sourceInfo = ???
242:
243: String v = req.getHeader("Content-Length");
244: if (v != null) {
245: size = Long.valueOf(v);
246: }
247: }
248:
249: public InputStream getStream() throws IOException {
250: return req.getInputStream();
251: }
252: }
253:
254: /**
255: * Wrap a FileItem as a ContentStream
256: */
257: class FileItemContentStream extends ContentStreamBase {
258: private final FileItem item;
259:
260: public FileItemContentStream(FileItem f) {
261: item = f;
262: contentType = item.getContentType();
263: name = item.getName();
264: sourceInfo = item.getFieldName();
265: size = item.getSize();
266: }
267:
268: public InputStream getStream() throws IOException {
269: return item.getInputStream();
270: }
271: }
272:
273: /**
274: * The simple parser just uses the params directly
275: */
276: class RawRequestParser implements SolrRequestParser {
277: public SolrParams parseParamsAndFillStreams(
278: final HttpServletRequest req,
279: ArrayList<ContentStream> streams) throws Exception {
280: // The javadocs for HttpServletRequest are clear that req.getReader() should take
281: // care of any character encoding issues. BUT, there are problems while running on
282: // some servlet containers: including Tomcat 5 and resin.
283: //
284: // Rather than return req.getReader(), this uses the default ContentStreamBase method
285: // that checks for charset definitions in the ContentType.
286:
287: streams.add(new HttpRequestContentStream(req));
288: return SolrRequestParsers
289: .parseQueryString(req.getQueryString());
290: }
291: }
292:
293: /**
294: * Extract Multipart streams
295: */
296: class MultipartRequestParser implements SolrRequestParser {
297: private long uploadLimitKB;
298:
299: public MultipartRequestParser(long limit) {
300: uploadLimitKB = limit;
301: }
302:
303: public SolrParams parseParamsAndFillStreams(
304: final HttpServletRequest req,
305: ArrayList<ContentStream> streams) throws Exception {
306: if (!ServletFileUpload.isMultipartContent(req)) {
307: throw new SolrException(
308: SolrException.ErrorCode.BAD_REQUEST,
309: "Not multipart content! " + req.getContentType());
310: }
311:
312: MultiMapSolrParams params = SolrRequestParsers
313: .parseQueryString(req.getQueryString());
314:
315: // Create a factory for disk-based file items
316: DiskFileItemFactory factory = new DiskFileItemFactory();
317:
318: // Set factory constraints
319: // TODO - configure factory.setSizeThreshold(yourMaxMemorySize);
320: // TODO - configure factory.setRepository(yourTempDirectory);
321:
322: // Create a new file upload handler
323: ServletFileUpload upload = new ServletFileUpload(factory);
324: upload.setSizeMax(uploadLimitKB * 1024);
325:
326: // Parse the request
327: List items = upload.parseRequest(req);
328: Iterator iter = items.iterator();
329: while (iter.hasNext()) {
330: FileItem item = (FileItem) iter.next();
331:
332: // If its a form field, put it in our parameter map
333: if (item.isFormField()) {
334: MultiMapSolrParams.addParam(item.getFieldName(), item
335: .getString(), params.getMap());
336: }
337: // Only add it if it actually has something...
338: else if (item.getSize() > 0) {
339: streams.add(new FileItemContentStream(item));
340: }
341: }
342: return params;
343: }
344: }
345:
346: /**
347: * The default Logic
348: */
349: class StandardRequestParser implements SolrRequestParser {
350: MultipartRequestParser multipart;
351: RawRequestParser raw;
352:
353: StandardRequestParser(MultipartRequestParser multi,
354: RawRequestParser raw) {
355: this .multipart = multi;
356: this .raw = raw;
357: }
358:
359: public SolrParams parseParamsAndFillStreams(
360: final HttpServletRequest req,
361: ArrayList<ContentStream> streams) throws Exception {
362: String method = req.getMethod().toUpperCase();
363: if ("GET".equals(method)) {
364: return new ServletSolrParams(req);
365: }
366: if ("POST".equals(method)) {
367: String contentType = req.getContentType();
368: if (contentType != null) {
369: int idx = contentType.indexOf(';');
370: if (idx > 0) { // remove the charset definition "; charset=utf-8"
371: contentType = contentType.substring(0, idx);
372: }
373: if ("application/x-www-form-urlencoded"
374: .equals(contentType.toLowerCase())) {
375: return new ServletSolrParams(req); // just get the params from parameterMap
376: }
377: if (ServletFileUpload.isMultipartContent(req)) {
378: return multipart.parseParamsAndFillStreams(req,
379: streams);
380: }
381: }
382: return raw.parseParamsAndFillStreams(req, streams);
383: }
384: throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
385: "Unsuported method: " + method);
386: }
387: }
|