001: /*
002: * Copyright 2003-2005 Michael Franken, Zilverline.
003: *
004: * The contents of this file, or the files included with this file, are subject to
005: * the current version of ZILVERLINE Collaborative Source License for the
006: * Zilverline Search Engine (the "License"); You may not use this file except in
007: * compliance with the License.
008: *
009: * You may obtain a copy of the License at
010: *
011: * http://www.zilverline.org.
012: *
013: * See the License for the rights, obligations and
014: * limitations governing use of the contents of the file.
015: *
016: * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
017: * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
018: * copyrights in the portions it created. All Rights Reserved.
019: *
020: */
021:
022: package org.zilverline.core;
023:
024: import java.io.File;
025:
026: import org.zilverline.service.CollectionManager;
027:
028: /**
029: * DocumentCollection is the base type for all Collections of Documents that are to be indexed by Lucene.
030: *
031: * @author Michael Franken
032: * @version $Revision: 1.8 $
033: */
034: public interface DocumentCollection {
035: /**
036: * Get the description of the collection.
037: *
038: * @return description for the collection
039: */
040: String getDescription();
041:
042: /**
043: * Determines the URL of the collection.
044: * <p>
045: * The URL maps the contentDir to another location. e.g. A document 'ldap.pdf' in contentDir 'e:\collection\books\' with an URL
046: * of 'http://search.company.com/books/' will be returned in a search result as
047: * <code>http://search.company.com/books/ldap.pdf</code>
048: * </p>
049: *
050: * @return the URL of the collection as a String, possibly null in the exeptional case where there is no contentDir
051: */
052: String getUrlDefault();
053:
054: /**
055: * Gets the directory where this collection's cache is stored. If the cacheDir is not set for this Collection, the name of this
056: * collection is used, possibly prepended with the (default) retrieved from the manager. The cache is used to (temporarily)
057: * store expanded content, such as zip files.
058: *
059: * @return The directory where the cache of this collection is stored on disk.
060: */
061: File getCacheDirWithManagerDefaults();
062:
063: /**
064: * Get the number of documents in this collection. The number is not calculated, but stored after indexing process, so it is a
065: * cheap operation.
066: *
067: * @return number of documents in collection
068: */
069: int getNumberOfDocs();
070:
071: /**
072: * The URL maps the cacheDir to another location.
073: *
074: * <p>
075: * e.g. A document 'ldap.pdf' in cacheDir 'e:\collection\cache\books\' with an cacheURL of
076: * 'http://search.company.com/cachedBooks/' will be returned in a search result as
077: * <code>http://search.company.com/cachedBooks/ldap.pdf</code>
078: * </p>
079: *
080: * @return the cacheUrl of the collection, or the cacheDir as URL if url is null or empty.
081: */
082: String getCacheUrlWithManagerDefaults();
083:
084: /**
085: * 'Calculates' the directory where the index of this collection is stored on disk. If the indexDir is not set for this
086: * Collection, the name of this collection is used, possibly prepended with the baseDir retrieved from the manager.
087: *
088: * @return The directory where the index of this collection is stored on disk, never null
089: */
090: File getIndexDirWithManagerDefaults();
091:
092: /**
093: * Determines whether the cache containing archive's contents should be kept after being indexed. It does so by retrieving the
094: * defaults from the manager if needed.
095: *
096: * @return true if so.
097: */
098: boolean isKeepCacheWithManagerDefaults();
099:
100: /**
101: * Check whether the index of this collection is valid. An index is valid when the directory exists and there is an index in it.
102: *
103: * @return true if the index is valid, otherwise false.
104: *
105: * @throws IndexException when existing index of Collection can not be succesfully opened.
106: */
107: boolean isIndexValid() throws IndexException;
108:
109: /**
110: * Get the id of the collection.
111: *
112: * @return unique id, can be null
113: */
114: Long getId();
115:
116: /**
117: * Get the name of this collection.
118: *
119: * @return name of collection
120: */
121: String getName();
122:
123: /**
124: * Indicates whether any indexing is going on.
125: *
126: * @return true if so.
127: */
128: boolean isIndexingInProgress();
129:
130: /**
131: * Index the given Collection.
132: *
133: * @param fullIndex indicated whether a full or incremental index should be created
134: * @throws IndexException if the Collections can not be indexed
135: */
136: void index(boolean fullIndex) throws IndexException;
137:
138: /**
139: * Index the given Collection.
140: *
141: * @param fullIndex indicated whether a full or incremental index should be created
142: * @throws IndexException if the Collections can not be indexed
143: */
144: void indexInThread(boolean fullIndex) throws IndexException;
145:
146: /**
147: * Set the id of the collection. The id is used by the collectionManager to add and retrieve collections.
148: *
149: * @param theId the Id
150: */
151: void setId(Long theId);
152:
153: /**
154: * Initialize this collection by getting its index. It retrieves the number of documents and the MD5 hash of all documents in
155: * the collection.
156: *
157: * If the index does not exist (this is a new Collection) just return.
158: *
159: * @throws IndexException when existing index of Collection can not be succesfully opened.
160: */
161: void init() throws IndexException;
162:
163: /**
164: * Set the collectionManager.
165: *
166: * @param thisManager The CollectionManager holding this collection.
167: */
168: void setManager(CollectionManager thisManager);
169: }
|