001: /*
002: * Copyright 2003-2004 Michael Franken, Zilverline.
003: *
004: * The contents of this file, or the files included with this file, are subject to
005: * the current version of ZILVERLINE Collaborative Source License for the
006: * Zilverline Search Engine (the "License"); You may not use this file except in
007: * compliance with the License.
008: *
009: * You may obtain a copy of the License at
010: *
011: * http://www.zilverline.org.
012: *
013: * See the License for the rights, obligations and
014: * limitations governing use of the contents of the file.
015: *
016: * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
017: * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
018: * copyrights in the portions it created. All Rights Reserved.
019: *
020: */
021:
022: package org.zilverline.service;
023:
024: import java.io.File;
025: import java.util.List;
026:
027: import org.apache.lucene.analysis.Analyzer;
028:
029: import org.zilverline.core.DocumentCollection;
030: import org.zilverline.core.ExtractorFactory;
031: import org.zilverline.core.FileSystemCollection;
032: import org.zilverline.core.Handler;
033: import org.zilverline.core.IndexException;
034:
035: /**
036: * The CollectionManager holds all collections, and base values for them.
037: *
038: * @author Michael Franken
039: * @version $Revision: 1.16 $
040: *
041: * @since 18 september 2004
042: */
043: public interface CollectionManager {
044: /**
045: * @return Returns the analyzer.
046: */
047: String getAnalyzer();
048:
049: /**
050: * Deletes collection from list of collections.
051: *
052: * @param col Collection containing documents
053: *
054: */
055: void deleteCollection(DocumentCollection col);
056:
057: /**
058: * @return Returns the allAnalyzers.
059: */
060: String[] getAllAnalyzers();
061:
062: /**
063: * @return Returns the allExtractors.
064: */
065: String[] getAllExtractors();
066:
067: /**
068: * Add collection to list of collections.
069: *
070: * @param col Collection containing documents
071: */
072: void addCollection(final DocumentCollection col);
073:
074: /**
075: * Get the cache base directory.
076: *
077: * @return String the directory where the cache sits
078: */
079: File getCacheBaseDir();
080:
081: /**
082: * Gets a collection by id.
083: *
084: * @param theId The id of the collection
085: *
086: * @return Collection or null if not found
087: */
088: DocumentCollection getCollection(final Long theId);
089:
090: /**
091: * Gets a collection by name.
092: *
093: * @param theName The name of the collection
094: *
095: * @return Collection or null if not found
096: */
097: DocumentCollection getCollectionByName(final String theName);
098:
099: /**
100: * Get all collections.
101: *
102: * @return collections List of collections
103: */
104: List getCollections();
105:
106: /**
107: * Get the base directory for the index.
108: *
109: * @return the directory
110: */
111: File getIndexBaseDir();
112:
113: /**
114: * Initializes all collections.
115: *
116: * @throws IndexException if the Collection can not be initialized or retrieved from store.
117: */
118: void init() throws IndexException;
119:
120: /**
121: * The default for all collections whether to keep cache dir after indexing.
122: *
123: * @return whether to keep the cache or not.
124: */
125: boolean isKeepCache();
126:
127: /**
128: * The default cache base directory for all collections. The cache is the directory on disk where zipped content is unzipped for
129: * indexing.
130: *
131: * @param thisDir the directory on disk
132: */
133: void setCacheBaseDir(final File this Dir);
134:
135: /**
136: * The default index base directory for all collections. The index is the directory on disk where a Lucene index is stored.
137: *
138: * @param thisDir the directory on disk
139: *
140: * @see org.apache.lucene.index.IndexReader
141: */
142: void setIndexBaseDir(final File this Dir);
143:
144: /**
145: * Indicates whether a Collection cache should be kept after indexing. The value of this CollectionManagerImpl functions as
146: * default for all Collections.
147: *
148: * @param b keep cache or not.
149: */
150: void setKeepCache(final boolean b);
151:
152: /**
153: * Indicates whether any indexing is going on.
154: *
155: * @return true if so.
156: */
157: boolean isIndexingInProgress();
158:
159: /**
160: * Returns an Analyzer for this collection based on configuration.
161: *
162: * @return the Analyzer used to index and search this collection
163: */
164: Analyzer createAnalyzer();
165:
166: /**
167: * Create an Analyzer as specified by the given String.
168: *
169: * @param analyzerClassName the name of the class. The class needs to be available on the classpath.
170: */
171: void setAnalyzer(final String analyzerClassName);
172:
173: /**
174: * Store the CollectionManager to store.
175: *
176: * @throws IndexException when collectionManager can not be saved to underlying store
177: */
178: void store() throws IndexException;
179:
180: /**
181: * get the ArchiveHandler, which contains the mappings for unArchiving archives.
182: *
183: * @return object containing mappings for handling archives
184: */
185: Handler getArchiveHandler();
186:
187: /**
188: * @return Returns the factory.
189: */
190: ExtractorFactory getFactory();
191:
192: /**
193: * Set the ArchiveHandler.
194: *
195: * @param handler object containing mappings for handling archives
196: */
197: void setArchiveHandler(final Handler handler);
198:
199: /**
200: * @param thatFactory The factory to set.
201: */
202: void setFactory(final ExtractorFactory thatFactory);
203:
204: /**
205: * @return Returns the mergeFactor.
206: */
207: Integer getMergeFactor();
208:
209: /**
210: * @param mergeFactor The mergeFactor to set.
211: */
212: void setMergeFactor(Integer mergeFactor);
213:
214: /**
215: * @return Returns the priority.
216: */
217: Integer getPriority();
218:
219: /**
220: * @param priority The priority to set.
221: */
222: void setPriority(Integer priority);
223:
224: /**
225: * @return Returns the maxMergeDocs.
226: */
227: Integer getMaxMergeDocs();
228:
229: /**
230: * @param maxMergeDocs The maxMergeDocs to set.
231: */
232: void setMaxMergeDocs(Integer maxMergeDocs);
233:
234: /**
235: * @return Returns the minMergeDocs.
236: */
237: Integer getMinMergeDocs();
238:
239: /**
240: * @param minMergeDocs The minMergeDocs to set.
241: */
242: void setMinMergeDocs(Integer minMergeDocs);
243:
244: /**
245: * Expands Archive to disk. This is used is 'on-the-fly' extraction from cache
246: *
247: * @param col the Collection to which cache this archive is extracted
248: * @param zip the archive
249: *
250: * @return true if archive could be extracted
251: *
252: * @throws IndexException
253: *
254: * @see org.zilverline.web.CacheController
255: */
256: boolean expandArchive(final FileSystemCollection col, final File zip)
257: throws IndexException;
258:
259: /**
260: * 'unpacks' a given archive file into cache directory with derived name. e.g. c:\temp\file.chm wil be unpacked into
261: * [cacheDir]\file_chm\.
262: *
263: * @param sourceFile the Archive file to be unpacked
264: * @param thisCollection the collection whose cache and contenDir is used
265: *
266: * @return File (new) directory containing unpacked file, null if unknown Archive
267: */
268: File unPack(final File sourceFile,
269: final FileSystemCollection thisCollection);
270: }
|