001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.document.Document;
021: import org.apache.lucene.document.FieldSelector;
022:
023: import java.io.IOException;
024: import java.util.Collection;
025: import java.util.Hashtable;
026:
027: import org.apache.lucene.index.MultiSegmentReader.MultiTermDocs;
028: import org.apache.lucene.index.MultiSegmentReader.MultiTermEnum;
029: import org.apache.lucene.index.MultiSegmentReader.MultiTermPositions;
030:
031: /** An IndexReader which reads multiple indexes, appending their content.
032: *
033: * @version $Id: MultiReader.java 596004 2007-11-17 21:34:23Z buschmi $
034: */
035: public class MultiReader extends IndexReader {
036: protected IndexReader[] subReaders;
037: private int[] starts; // 1st docno for each segment
038: private boolean[] decrefOnClose; // remember which subreaders to decRef on close
039: private Hashtable normsCache = new Hashtable();
040: private int maxDoc = 0;
041: private int numDocs = -1;
042: private boolean hasDeletions = false;
043:
044: /**
045: * <p>Construct a MultiReader aggregating the named set of (sub)readers.
046: * Directory locking for delete, undeleteAll, and setNorm operations is
047: * left to the subreaders. </p>
048: * <p>Note that all subreaders are closed if this Multireader is closed.</p>
049: * @param subReaders set of (sub)readers
050: * @throws IOException
051: */
052: public MultiReader(IndexReader[] subReaders) {
053: initialize(subReaders, true);
054: }
055:
056: /**
057: * <p>Construct a MultiReader aggregating the named set of (sub)readers.
058: * Directory locking for delete, undeleteAll, and setNorm operations is
059: * left to the subreaders. </p>
060: * @param closeSubReaders indicates whether the subreaders should be closed
061: * when this MultiReader is closed
062: * @param subReaders set of (sub)readers
063: * @throws IOException
064: */
065: public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) {
066: initialize(subReaders, closeSubReaders);
067: }
068:
069: private void initialize(IndexReader[] subReaders,
070: boolean closeSubReaders) {
071: this .subReaders = subReaders;
072: starts = new int[subReaders.length + 1]; // build starts array
073: decrefOnClose = new boolean[subReaders.length];
074: for (int i = 0; i < subReaders.length; i++) {
075: starts[i] = maxDoc;
076: maxDoc += subReaders[i].maxDoc(); // compute maxDocs
077:
078: if (!closeSubReaders) {
079: subReaders[i].incRef();
080: decrefOnClose[i] = true;
081: } else {
082: decrefOnClose[i] = false;
083: }
084:
085: if (subReaders[i].hasDeletions())
086: hasDeletions = true;
087: }
088: starts[subReaders.length] = maxDoc;
089: }
090:
091: /**
092: * Tries to reopen the subreaders.
093: * <br>
094: * If one or more subreaders could be re-opened (i. e. subReader.reopen()
095: * returned a new instance != subReader), then a new MultiReader instance
096: * is returned, otherwise this instance is returned.
097: * <p>
098: * A re-opened instance might share one or more subreaders with the old
099: * instance. Index modification operations result in undefined behavior
100: * when performed before the old instance is closed.
101: * (see {@link IndexReader#reopen()}).
102: * <p>
103: * If subreaders are shared, then the reference count of those
104: * readers is increased to ensure that the subreaders remain open
105: * until the last referring reader is closed.
106: *
107: * @throws CorruptIndexException if the index is corrupt
108: * @throws IOException if there is a low-level IO error
109: */
110: public IndexReader reopen() throws CorruptIndexException,
111: IOException {
112: ensureOpen();
113:
114: boolean reopened = false;
115: IndexReader[] newSubReaders = new IndexReader[subReaders.length];
116: boolean[] newDecrefOnClose = new boolean[subReaders.length];
117:
118: boolean success = false;
119: try {
120: for (int i = 0; i < subReaders.length; i++) {
121: newSubReaders[i] = subReaders[i].reopen();
122: // if at least one of the subreaders was updated we remember that
123: // and return a new MultiReader
124: if (newSubReaders[i] != subReaders[i]) {
125: reopened = true;
126: // this is a new subreader instance, so on close() we don't
127: // decRef but close it
128: newDecrefOnClose[i] = false;
129: }
130: }
131:
132: if (reopened) {
133: for (int i = 0; i < subReaders.length; i++) {
134: if (newSubReaders[i] == subReaders[i]) {
135: newSubReaders[i].incRef();
136: newDecrefOnClose[i] = true;
137: }
138: }
139:
140: MultiReader mr = new MultiReader(newSubReaders);
141: mr.decrefOnClose = newDecrefOnClose;
142: success = true;
143: return mr;
144: } else {
145: success = true;
146: return this ;
147: }
148: } finally {
149: if (!success && reopened) {
150: for (int i = 0; i < newSubReaders.length; i++) {
151: if (newSubReaders[i] != null) {
152: try {
153: if (newDecrefOnClose[i]) {
154: newSubReaders[i].decRef();
155: } else {
156: newSubReaders[i].close();
157: }
158: } catch (IOException ignore) {
159: // keep going - we want to clean up as much as possible
160: }
161: }
162: }
163: }
164: }
165: }
166:
167: public TermFreqVector[] getTermFreqVectors(int n)
168: throws IOException {
169: ensureOpen();
170: int i = readerIndex(n); // find segment num
171: return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
172: }
173:
174: public TermFreqVector getTermFreqVector(int n, String field)
175: throws IOException {
176: ensureOpen();
177: int i = readerIndex(n); // find segment num
178: return subReaders[i].getTermFreqVector(n - starts[i], field);
179: }
180:
181: public void getTermFreqVector(int docNumber, String field,
182: TermVectorMapper mapper) throws IOException {
183: ensureOpen();
184: int i = readerIndex(docNumber); // find segment num
185: subReaders[i].getTermFreqVector(docNumber - starts[i], field,
186: mapper);
187: }
188:
189: public void getTermFreqVector(int docNumber, TermVectorMapper mapper)
190: throws IOException {
191: ensureOpen();
192: int i = readerIndex(docNumber); // find segment num
193: subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
194: }
195:
196: public boolean isOptimized() {
197: return false;
198: }
199:
200: public synchronized int numDocs() {
201: // Don't call ensureOpen() here (it could affect performance)
202: if (numDocs == -1) { // check cache
203: int n = 0; // cache miss--recompute
204: for (int i = 0; i < subReaders.length; i++)
205: n += subReaders[i].numDocs(); // sum from readers
206: numDocs = n;
207: }
208: return numDocs;
209: }
210:
211: public int maxDoc() {
212: // Don't call ensureOpen() here (it could affect performance)
213: return maxDoc;
214: }
215:
216: // inherit javadoc
217: public Document document(int n, FieldSelector fieldSelector)
218: throws CorruptIndexException, IOException {
219: ensureOpen();
220: int i = readerIndex(n); // find segment num
221: return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
222: }
223:
224: public boolean isDeleted(int n) {
225: // Don't call ensureOpen() here (it could affect performance)
226: int i = readerIndex(n); // find segment num
227: return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
228: }
229:
230: public boolean hasDeletions() {
231: // Don't call ensureOpen() here (it could affect performance)
232: return hasDeletions;
233: }
234:
235: protected void doDelete(int n) throws CorruptIndexException,
236: IOException {
237: numDocs = -1; // invalidate cache
238: int i = readerIndex(n); // find segment num
239: subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader
240: hasDeletions = true;
241: }
242:
243: protected void doUndeleteAll() throws CorruptIndexException,
244: IOException {
245: for (int i = 0; i < subReaders.length; i++)
246: subReaders[i].undeleteAll();
247:
248: hasDeletions = false;
249: numDocs = -1; // invalidate cache
250: }
251:
252: private int readerIndex(int n) { // find reader for doc n:
253: return MultiSegmentReader.readerIndex(n, this .starts,
254: this .subReaders.length);
255: }
256:
257: public boolean hasNorms(String field) throws IOException {
258: ensureOpen();
259: for (int i = 0; i < subReaders.length; i++) {
260: if (subReaders[i].hasNorms(field))
261: return true;
262: }
263: return false;
264: }
265:
266: private byte[] ones;
267:
268: private byte[] fakeNorms() {
269: if (ones == null)
270: ones = SegmentReader.createFakeNorms(maxDoc());
271: return ones;
272: }
273:
274: public synchronized byte[] norms(String field) throws IOException {
275: ensureOpen();
276: byte[] bytes = (byte[]) normsCache.get(field);
277: if (bytes != null)
278: return bytes; // cache hit
279: if (!hasNorms(field))
280: return fakeNorms();
281:
282: bytes = new byte[maxDoc()];
283: for (int i = 0; i < subReaders.length; i++)
284: subReaders[i].norms(field, bytes, starts[i]);
285: normsCache.put(field, bytes); // update cache
286: return bytes;
287: }
288:
289: public synchronized void norms(String field, byte[] result,
290: int offset) throws IOException {
291: ensureOpen();
292: byte[] bytes = (byte[]) normsCache.get(field);
293: if (bytes == null && !hasNorms(field))
294: bytes = fakeNorms();
295: if (bytes != null) // cache hit
296: System.arraycopy(bytes, 0, result, offset, maxDoc());
297:
298: for (int i = 0; i < subReaders.length; i++)
299: // read from segments
300: subReaders[i].norms(field, result, offset + starts[i]);
301: }
302:
303: protected void doSetNorm(int n, String field, byte value)
304: throws CorruptIndexException, IOException {
305: normsCache.remove(field); // clear cache
306: int i = readerIndex(n); // find segment num
307: subReaders[i].setNorm(n - starts[i], field, value); // dispatch
308: }
309:
310: public TermEnum terms() throws IOException {
311: ensureOpen();
312: return new MultiTermEnum(subReaders, starts, null);
313: }
314:
315: public TermEnum terms(Term term) throws IOException {
316: ensureOpen();
317: return new MultiTermEnum(subReaders, starts, term);
318: }
319:
320: public int docFreq(Term t) throws IOException {
321: ensureOpen();
322: int total = 0; // sum freqs in segments
323: for (int i = 0; i < subReaders.length; i++)
324: total += subReaders[i].docFreq(t);
325: return total;
326: }
327:
328: public TermDocs termDocs() throws IOException {
329: ensureOpen();
330: return new MultiTermDocs(subReaders, starts);
331: }
332:
333: public TermPositions termPositions() throws IOException {
334: ensureOpen();
335: return new MultiTermPositions(subReaders, starts);
336: }
337:
338: protected void doCommit() throws IOException {
339: for (int i = 0; i < subReaders.length; i++)
340: subReaders[i].commit();
341: }
342:
343: protected synchronized void doClose() throws IOException {
344: for (int i = 0; i < subReaders.length; i++) {
345: if (decrefOnClose[i]) {
346: subReaders[i].decRef();
347: } else {
348: subReaders[i].close();
349: }
350: }
351: }
352:
353: public Collection getFieldNames(IndexReader.FieldOption fieldNames) {
354: ensureOpen();
355: return MultiSegmentReader.getFieldNames(fieldNames,
356: this .subReaders);
357: }
358:
359: /**
360: * Checks recursively if all subreaders are up to date.
361: */
362: public boolean isCurrent() throws CorruptIndexException,
363: IOException {
364: for (int i = 0; i < subReaders.length; i++) {
365: if (!subReaders[i].isCurrent()) {
366: return false;
367: }
368: }
369:
370: // all subreaders are up to date
371: return true;
372: }
373:
374: /** Not implemented.
375: * @throws UnsupportedOperationException
376: */
377: public long getVersion() {
378: throw new UnsupportedOperationException(
379: "MultiReader does not support this method.");
380: }
381:
382: // for testing
383: IndexReader[] getSubReaders() {
384: return subReaders;
385: }
386: }
|