001: /*
002: * regain - A file search engine providing plenty of formats
003: * Copyright (C) 2004 Til Schneider
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public
007: * License as published by the Free Software Foundation; either
008: * version 2.1 of the License, or (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public
016: * License along with this library; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: *
019: * Contact: Til Schneider, info@murfman.de
020: */
021: package net.sf.regain.search;
022:
023: import java.io.File;
024: import java.io.FileInputStream;
025: import java.io.IOException;
026: import java.io.OutputStream;
027: import java.util.Enumeration;
028: import java.util.HashMap;
029:
030: import net.sf.regain.RegainException;
031: import net.sf.regain.RegainToolkit;
032: import net.sf.regain.search.access.SearchAccessController;
033: import net.sf.regain.search.config.DefaultSearchConfigFactory;
034: import net.sf.regain.search.config.IndexConfig;
035: import net.sf.regain.search.config.SearchConfig;
036: import net.sf.regain.search.config.SearchConfigFactory;
037: import net.sf.regain.search.results.MultipleSearchResults;
038: import net.sf.regain.search.results.SearchResults;
039: import net.sf.regain.search.results.SingleSearchResults;
040: import net.sf.regain.util.sharedtag.PageRequest;
041: import net.sf.regain.util.sharedtag.PageResponse;
042:
043: import org.apache.lucene.index.Term;
044: import org.apache.lucene.search.Hits;
045: import org.apache.lucene.search.Query;
046: import org.apache.lucene.search.TermQuery;
047:
048: /**
049: * A toolkit for the search JSPs containing helper methods.
050: *
051: * @author Til Schneider, www.murfman.de
052: */
053: public class SearchToolkit {
054:
055: /** The name of the page context attribute that holds the search query. */
056: private static final String SEARCH_QUERY_CONTEXT_ATTR_NAME = "SearchQuery";
057:
058: /** The name of the page context attribute that holds the SearchResults. */
059: private static final String SEARCH_RESULTS_ATTR_NAME = "SearchResults";
060:
061: /** The name of the page context attribute that holds the IndexConfig array. */
062: private static final String INDEX_CONFIG_CONTEXT_ARRAY_ATTR_NAME = "IndexConfigArr";
063:
064: /** The prefix for request parameters that contain additional field values. */
065: private static final String FIELD_PREFIX = "field.";
066:
067: /** The configuration of the search mask. */
068: private static SearchConfig mConfig;
069:
070: /** Holds for an extension the mime type. */
071: private static HashMap mMimeTypeHash;
072:
073: /**
074: * Gets the IndexConfig array from the PageContext. It contains the
075: * configurations of all indexes the search query is searching on.
076: * <p>
077: * If there is no IndexConfig array in the PageContext it is put in the
078: * PageContext, so the next call will find it.
079: *
080: * @param request The page request where the IndexConfig array will be taken
081: * from or put to.
082: * @return The IndexConfig array for the page the context is for.
083: * @throws RegainException If there is no IndexConfig for the specified index.
084: */
085: public static IndexConfig[] getIndexConfigArr(PageRequest request)
086: throws RegainException {
087: IndexConfig[] configArr = (IndexConfig[]) request
088: .getContextAttribute(INDEX_CONFIG_CONTEXT_ARRAY_ATTR_NAME);
089: if (configArr == null) {
090: // Load the config (if not yet done)
091: loadConfiguration(request);
092:
093: // Get the names of the indexes
094: String[] indexNameArr = request.getParameters("index");
095: if (indexNameArr == null) {
096: // There was no index specified -> Check whether we have default indexes
097: // defined
098: indexNameArr = mConfig.getDefaultIndexNameArr();
099: if (indexNameArr == null) {
100: throw new RegainException(
101: "Request parameter 'index' not specified and "
102: + "no default index configured");
103: }
104: }
105:
106: // Get the configurations for these indexes
107: configArr = new IndexConfig[indexNameArr.length];
108: for (int i = 0; i < indexNameArr.length; i++) {
109: configArr[i] = mConfig.getIndexConfig(indexNameArr[i]);
110: if (configArr[i] == null) {
111: throw new RegainException(
112: "The configuration does not contain the index '"
113: + indexNameArr[i] + "'");
114: }
115: }
116:
117: // Store the IndexConfig in the page context
118: request.setContextAttribute(
119: INDEX_CONFIG_CONTEXT_ARRAY_ATTR_NAME, configArr);
120: }
121: return configArr;
122: }
123:
124: /**
125: * Gets the search query.
126: *
127: * @param request The request to get the query from.
128: * @return The search query.
129: * @throws RegainException If getting the query failed.
130: */
131: public static String getSearchQuery(PageRequest request)
132: throws RegainException {
133: String queryString = (String) request
134: .getContextAttribute(SEARCH_QUERY_CONTEXT_ATTR_NAME);
135: if (queryString == null) {
136: // Get the query parameter
137: StringBuffer query = new StringBuffer();
138: String[] queryParamArr = request
139: .getParametersNotNull("query");
140: for (int i = 0; i < queryParamArr.length; i++) {
141: if (i != 0) {
142: query.append(" ");
143: }
144: query.append(queryParamArr[i]);
145: }
146:
147: // Append the additional fields to the query
148: Enumeration enm = request.getParameterNames();
149: while (enm.hasMoreElements()) {
150: String paramName = (String) enm.nextElement();
151: if (paramName.startsWith(FIELD_PREFIX)) {
152: // This is an additional field -> Append it to the query
153: String fieldName = paramName.substring(FIELD_PREFIX
154: .length());
155: String fieldValue = request.getParameter(paramName);
156:
157: if (fieldValue != null) {
158: fieldValue = fieldValue.trim();
159: if (fieldValue.length() != 0) {
160: query.append(" " + fieldName + ":\""
161: + fieldValue + "\"");
162: }
163: }
164: }
165: }
166:
167: queryString = query.toString().trim();
168: request.setContextAttribute(SEARCH_QUERY_CONTEXT_ATTR_NAME,
169: queryString);
170: }
171:
172: return queryString;
173: }
174:
175: /**
176: * Gets the SearchResults from the PageContext.
177: * <p>
178: * If there is no SearchResults in the PageContext it is created and put in the
179: * PageContext, so the next call will find it.
180: *
181: * @param request The page request where the SearchResults will be taken
182: * from or put to.
183: * @return The SearchResults for the page the context is for.
184: * @throws RegainException If the SearchResults could not be created.
185: * @see SearchResults
186: */
187: public static SearchResults getSearchResults(PageRequest request)
188: throws RegainException {
189: SearchResults results = (SearchResults) request
190: .getContextAttribute(SEARCH_RESULTS_ATTR_NAME);
191: if (results == null) {
192: // Get the index configurations
193: IndexConfig[] indexConfigArr = getIndexConfigArr(request);
194:
195: if (indexConfigArr.length == 1) {
196: results = createSingleSearchResults(indexConfigArr[0],
197: request);
198: } else {
199: SingleSearchResults[] childResultsArr = new SingleSearchResults[indexConfigArr.length];
200: for (int i = 0; i < childResultsArr.length; i++) {
201: childResultsArr[i] = createSingleSearchResults(
202: indexConfigArr[i], request);
203: }
204: results = new MultipleSearchResults(childResultsArr);
205: }
206:
207: // Store the SearchResults in the page context
208: request.setContextAttribute(SEARCH_RESULTS_ATTR_NAME,
209: results);
210: }
211:
212: return results;
213: }
214:
215: /**
216: * Gets the SingleSearchResults from one index.
217: *
218: * @param indexConfig The config of the index to search in.
219: * @param request The request that initiated the search.
220: * @return The SingleSearchResults for the index.
221: * @throws RegainException If searching failed.
222: */
223: private static SingleSearchResults createSingleSearchResults(
224: IndexConfig indexConfig, PageRequest request)
225: throws RegainException {
226: // Get the query
227: String query = getSearchQuery(request);
228:
229: // Get the groups the current user has reading rights for
230: String[] groupArr = null;
231: SearchAccessController accessController = indexConfig
232: .getSearchAccessController();
233: if (accessController != null) {
234: groupArr = accessController.getUserGroups(request);
235:
236: // Check the Group array
237: RegainToolkit.checkGroupArray(accessController, groupArr);
238: }
239:
240: return new SingleSearchResults(indexConfig, query, groupArr);
241: }
242:
243: /**
244: * Extracts the file URL from a request path.
245: *
246: * @param requestPath The request path to extract the file URL from.
247: * @param encoding The encoding to use for the URL-docoding of the requestPath.
248: * @return The extracted file URL.
249: * @throws RegainException If extracting the file URL failed.
250: *
251: * @see net.sf.regain.search.sharedlib.hit.LinkTag
252: */
253: public static String extractFileUrl(String requestPath,
254: String encoding) throws RegainException {
255: // NOTE: This is the counterpart to net.sf.regain.search.sharedlib.hit.LinkTag
256: // NOTE: Removing index GET Parameter not nessesary: We already have the requestPath
257:
258: // Decode the URL
259: String decodedHref = RegainToolkit.urlDecode(requestPath,
260: encoding);
261:
262: // Cut off "http://domain/file/"
263: int filePos = decodedHref.indexOf("file/");
264: String fileName = decodedHref.substring(filePos + 5);
265:
266: // Restore the double slashes
267: // See workaround in net.sf.regain.search.sharedlib.hit.LinkTag
268: fileName = RegainToolkit.replace(fileName, new String[] {
269: "$/$", "$$" }, new String[] { "/", "$" });
270:
271: // Assemble the file URL
272: return RegainToolkit.fileNameToUrl(fileName);
273: }
274:
275: /**
276: * Decides whether the remote access to a file should be allowed.
277: * <p>
278: * The access is granted if the file is in the index. The access is never
279: * granted for indexes that have an access controller.
280: *
281: * @param request The request that holds the used index.
282: * @param fileUrl The URL to file to check.
283: * @return Whether the remote access to a file should be allowed.
284: * @throws RegainException If checking the file failed.
285: */
286: public static boolean allowFileAccess(PageRequest request,
287: String fileUrl) throws RegainException {
288: IndexConfig[] configArr = getIndexConfigArr(request);
289:
290: // Check whether one of the indexes contains the file
291: for (int i = 0; i < configArr.length; i++) {
292: // NOTE: We only allow the file access if there is no access controller
293: if (configArr[i].getSearchAccessController() == null) {
294: String dir = configArr[i].getDirectory();
295: IndexSearcherManager manager = IndexSearcherManager
296: .getInstance(dir);
297:
298: // Check whether the document is in the index
299: Term urlTerm = new Term("url", fileUrl);
300: Query query = new TermQuery(urlTerm);
301: Hits hits = manager.search(query);
302:
303: // Allow the access if we found the file in the index
304: if (hits.length() > 0) {
305: return true;
306: }
307: }
308: }
309:
310: // We didn't find the file in the indexes -> File access is not allowed
311: return false;
312: }
313:
314: /**
315: * Sends a file to the client.
316: *
317: * @param request The request.
318: * @param response The response.
319: * @param file The file to send.
320: * @throws RegainException If sending the file failed.
321: */
322: public static void sendFile(PageRequest request,
323: PageResponse response, File file) throws RegainException {
324: long lastModified = file.lastModified();
325: if (lastModified < request.getHeaderAsDate("If-Modified-Since")) {
326: // The browser can use the cached file
327: response.sendError(304);
328: } else {
329: response
330: .setHeaderAsDate("Date", System.currentTimeMillis());
331: response.setHeaderAsDate("Last-Modified", lastModified);
332:
333: // TODO: Make this configurable
334: if (mMimeTypeHash == null) {
335: // Source: http://de.selfhtml.org/diverses/mimetypen.htm
336: mMimeTypeHash = new HashMap();
337: mMimeTypeHash.put("html", "text/html");
338: mMimeTypeHash.put("htm", "text/html");
339: mMimeTypeHash.put("gif", "image/gif");
340: mMimeTypeHash.put("jpg", "image/jpeg");
341: mMimeTypeHash.put("jpeg", "image/jpeg");
342: mMimeTypeHash.put("png", "image/png");
343: mMimeTypeHash.put("js", "text/javascript");
344: mMimeTypeHash.put("txt", "text/plain");
345: mMimeTypeHash.put("pdf", "application/pdf");
346: mMimeTypeHash.put("xls", "application/msexcel");
347: mMimeTypeHash.put("doc", "application/msword");
348: mMimeTypeHash.put("ppt", "application/mspowerpoint");
349: mMimeTypeHash.put("rtf", "text/rtf");
350:
351: // Source: http://framework.openoffice.org/documentation/mimetypes/mimetypes.html
352: mMimeTypeHash.put("sds",
353: "application/vnd.stardivision.chart");
354: mMimeTypeHash.put("sdc",
355: "application/vnd.stardivision.calc");
356: mMimeTypeHash.put("sdw",
357: "application/vnd.stardivision.writer");
358: mMimeTypeHash.put("sgl",
359: "application/vnd.stardivision.writer-global");
360: mMimeTypeHash.put("sda",
361: "application/vnd.stardivision.draw");
362: mMimeTypeHash.put("sdd",
363: "application/vnd.stardivision.impress");
364: mMimeTypeHash.put("sdf",
365: "application/vnd.stardivision.math");
366: mMimeTypeHash.put("sxw",
367: "application/vnd.sun.xml.writer");
368: mMimeTypeHash.put("stw",
369: "application/vnd.sun.xml.writer.template");
370: mMimeTypeHash.put("sxg",
371: "application/vnd.sun.xml.writer.global");
372: mMimeTypeHash
373: .put("sxc", "application/vnd.sun.xml.calc");
374: mMimeTypeHash.put("stc",
375: "application/vnd.sun.xml.calc.template");
376: mMimeTypeHash.put("sxi",
377: "application/vnd.sun.xml.impress");
378: mMimeTypeHash.put("sti",
379: "application/vnd.sun.xml.impress.template");
380: mMimeTypeHash
381: .put("sxd", "application/vnd.sun.xml.draw");
382: mMimeTypeHash.put("std",
383: "application/vnd.sun.xml.draw.template");
384: mMimeTypeHash
385: .put("sxm", "application/vnd.sun.xml.math");
386: mMimeTypeHash.put("odt",
387: "application/vnd.oasis.opendocument.text");
388: mMimeTypeHash
389: .put("ott",
390: "application/vnd.oasis.opendocument.text-template");
391: mMimeTypeHash.put("oth",
392: "application/vnd.oasis.opendocument.text-web");
393: mMimeTypeHash
394: .put("odm",
395: "application/vnd.oasis.opendocument.text-master");
396: mMimeTypeHash.put("odg",
397: "application/vnd.oasis.opendocument.graphics");
398: mMimeTypeHash
399: .put("otg",
400: "application/vnd.oasis.opendocument.graphics-template");
401: mMimeTypeHash
402: .put("odp",
403: "application/vnd.oasis.opendocument.presentation");
404: mMimeTypeHash
405: .put("otp",
406: "application/vnd.oasis.opendocument.presentation-template");
407: mMimeTypeHash
408: .put("ods",
409: "application/vnd.oasis.opendocument.spreadsheet");
410: mMimeTypeHash
411: .put("ots",
412: "application/vnd.oasis.opendocument.spreadsheet-template");
413: mMimeTypeHash.put("odc",
414: "application/vnd.oasis.opendocument.chart");
415: mMimeTypeHash.put("odf",
416: "application/vnd.oasis.opendocument.formula");
417: mMimeTypeHash.put("odb",
418: "application/vnd.oasis.opendocument.database");
419: mMimeTypeHash.put("odi",
420: "application/vnd.oasis.opendocument.image");
421: }
422:
423: // Set the MIME type
424: String filename = file.getName();
425: int lastDot = filename.lastIndexOf('.');
426: if (lastDot != -1) {
427: String extension = filename.substring(lastDot + 1);
428: String mimeType = (String) mMimeTypeHash.get(extension);
429: if (mimeType != null) {
430: response.setHeader("Content-Type", mimeType);
431: }
432: }
433:
434: // Send the file
435: OutputStream out = null;
436: FileInputStream in = null;
437: try {
438: out = response.getOutputStream();
439: in = new FileInputStream(file);
440: RegainToolkit.pipe(in, out);
441: } catch (IOException exc) {
442: throw new RegainException("Sending file failed: "
443: + file.getAbsolutePath(), exc);
444: } finally {
445: if (in != null) {
446: try {
447: in.close();
448: } catch (IOException exc) {
449: }
450: }
451: if (out != null) {
452: try {
453: out.close();
454: } catch (IOException exc) {
455: }
456: }
457: }
458: }
459: }
460:
461: /**
462: * Loads the configuration of the search mask.
463: * <p>
464: * If the configuration is already loaded, nothing is done.
465: *
466: * @param request The page request. Used to get the "configFile" init
467: * parameter, which holds the name of the configuration file.
468: * @throws RegainException If loading failed.
469: */
470: private static void loadConfiguration(PageRequest request)
471: throws RegainException {
472: if (mConfig == null) {
473: // Create the factory
474: String factoryClassname = request
475: .getInitParameter("searchConfigFactoryClass");
476: String factoryJarfile = request
477: .getInitParameter("searchConfigFactoryJar");
478: if (factoryClassname == null) {
479: factoryClassname = DefaultSearchConfigFactory.class
480: .getName();
481: }
482: SearchConfigFactory factory = (SearchConfigFactory) RegainToolkit
483: .createClassInstance(factoryClassname,
484: SearchConfigFactory.class, factoryJarfile);
485:
486: // Create the config
487: mConfig = factory.createSearchConfig(request);
488: }
489: }
490:
491: }
|