001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.document.Document;
021: import org.apache.lucene.document.FieldSelector;
022: import org.apache.lucene.document.FieldSelectorResult;
023: import org.apache.lucene.document.Fieldable;
024:
025: import java.io.IOException;
026: import java.util.*;
027:
028: /** An IndexReader which reads multiple, parallel indexes. Each index added
029: * must have the same number of documents, but typically each contains
030: * different fields. Each document contains the union of the fields of all
031: * documents with the same document number. When searching, matches for a
032: * query term are from the first index added that has the field.
033: *
034: * <p>This is useful, e.g., with collections that have large fields which
035: * change rarely and small fields that change more frequently. The smaller
036: * fields may be re-indexed in a new index and both indexes may be searched
037: * together.
038: *
039: * <p><strong>Warning:</strong> It is up to you to make sure all indexes
040: * are created and modified the same way. For example, if you add
041: * documents to one index, you need to add the same documents in the
042: * same order to the other indexes. <em>Failure to do so will result in
043: * undefined behavior</em>.
044: */
045: public class ParallelReader extends IndexReader {
046: private List readers = new ArrayList();
047: private List decrefOnClose = new ArrayList(); // remember which subreaders to decRef on close
048: boolean incRefReaders = false;
049: private SortedMap fieldToReader = new TreeMap();
050: private Map readerToFields = new HashMap();
051: private List storedFieldReaders = new ArrayList();
052:
053: private int maxDoc;
054: private int numDocs;
055: private boolean hasDeletions;
056:
057: /** Construct a ParallelReader.
058: * <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
059: */
060: public ParallelReader() throws IOException {
061: this (true);
062: }
063:
064: /** Construct a ParallelReader.
065: * @param closeSubReaders indicates whether the subreaders should be closed
066: * when this ParallelReader is closed
067: */
068: public ParallelReader(boolean closeSubReaders) throws IOException {
069: super ();
070: this .incRefReaders = !closeSubReaders;
071: }
072:
073: /** Add an IndexReader.
074: * @throws IOException if there is a low-level IO error
075: */
076: public void add(IndexReader reader) throws IOException {
077: ensureOpen();
078: add(reader, false);
079: }
080:
081: /** Add an IndexReader whose stored fields will not be returned. This can
082: * accellerate search when stored fields are only needed from a subset of
083: * the IndexReaders.
084: *
085: * @throws IllegalArgumentException if not all indexes contain the same number
086: * of documents
087: * @throws IllegalArgumentException if not all indexes have the same value
088: * of {@link IndexReader#maxDoc()}
089: * @throws IOException if there is a low-level IO error
090: */
091: public void add(IndexReader reader, boolean ignoreStoredFields)
092: throws IOException {
093:
094: ensureOpen();
095: if (readers.size() == 0) {
096: this .maxDoc = reader.maxDoc();
097: this .numDocs = reader.numDocs();
098: this .hasDeletions = reader.hasDeletions();
099: }
100:
101: if (reader.maxDoc() != maxDoc) // check compatibility
102: throw new IllegalArgumentException(
103: "All readers must have same maxDoc: " + maxDoc
104: + "!=" + reader.maxDoc());
105: if (reader.numDocs() != numDocs)
106: throw new IllegalArgumentException(
107: "All readers must have same numDocs: " + numDocs
108: + "!=" + reader.numDocs());
109:
110: Collection fields = reader
111: .getFieldNames(IndexReader.FieldOption.ALL);
112: readerToFields.put(reader, fields);
113: Iterator i = fields.iterator();
114: while (i.hasNext()) { // update fieldToReader map
115: String field = (String) i.next();
116: if (fieldToReader.get(field) == null)
117: fieldToReader.put(field, reader);
118: }
119:
120: if (!ignoreStoredFields)
121: storedFieldReaders.add(reader); // add to storedFieldReaders
122: readers.add(reader);
123:
124: if (incRefReaders) {
125: reader.incRef();
126: }
127: decrefOnClose.add(Boolean.valueOf(incRefReaders));
128: }
129:
130: /**
131: * Tries to reopen the subreaders.
132: * <br>
133: * If one or more subreaders could be re-opened (i. e. subReader.reopen()
134: * returned a new instance != subReader), then a new ParallelReader instance
135: * is returned, otherwise this instance is returned.
136: * <p>
137: * A re-opened instance might share one or more subreaders with the old
138: * instance. Index modification operations result in undefined behavior
139: * when performed before the old instance is closed.
140: * (see {@link IndexReader#reopen()}).
141: * <p>
142: * If subreaders are shared, then the reference count of those
143: * readers is increased to ensure that the subreaders remain open
144: * until the last referring reader is closed.
145: *
146: * @throws CorruptIndexException if the index is corrupt
147: * @throws IOException if there is a low-level IO error
148: */
149: public IndexReader reopen() throws CorruptIndexException,
150: IOException {
151: ensureOpen();
152:
153: boolean reopened = false;
154: List newReaders = new ArrayList();
155: List newDecrefOnClose = new ArrayList();
156:
157: boolean success = false;
158:
159: try {
160:
161: for (int i = 0; i < readers.size(); i++) {
162: IndexReader oldReader = (IndexReader) readers.get(i);
163: IndexReader newReader = oldReader.reopen();
164: newReaders.add(newReader);
165: // if at least one of the subreaders was updated we remember that
166: // and return a new MultiReader
167: if (newReader != oldReader) {
168: reopened = true;
169: }
170: }
171:
172: if (reopened) {
173: ParallelReader pr = new ParallelReader();
174: for (int i = 0; i < readers.size(); i++) {
175: IndexReader oldReader = (IndexReader) readers
176: .get(i);
177: IndexReader newReader = (IndexReader) newReaders
178: .get(i);
179: if (newReader == oldReader) {
180: newDecrefOnClose.add(Boolean.TRUE);
181: newReader.incRef();
182: } else {
183: // this is a new subreader instance, so on close() we don't
184: // decRef but close it
185: newDecrefOnClose.add(Boolean.FALSE);
186: }
187: pr.add(newReader, !storedFieldReaders
188: .contains(oldReader));
189: }
190: pr.decrefOnClose = newDecrefOnClose;
191: pr.incRefReaders = incRefReaders;
192: success = true;
193: return pr;
194: } else {
195: success = true;
196: // No subreader was refreshed
197: return this ;
198: }
199: } finally {
200: if (!success && reopened) {
201: for (int i = 0; i < newReaders.size(); i++) {
202: IndexReader r = (IndexReader) newReaders.get(i);
203: if (r != null) {
204: try {
205: if (((Boolean) newDecrefOnClose.get(i))
206: .booleanValue()) {
207: r.decRef();
208: } else {
209: r.close();
210: }
211: } catch (IOException ignore) {
212: // keep going - we want to clean up as much as possible
213: }
214: }
215: }
216: }
217: }
218: }
219:
220: public int numDocs() {
221: // Don't call ensureOpen() here (it could affect performance)
222: return numDocs;
223: }
224:
225: public int maxDoc() {
226: // Don't call ensureOpen() here (it could affect performance)
227: return maxDoc;
228: }
229:
230: public boolean hasDeletions() {
231: // Don't call ensureOpen() here (it could affect performance)
232: return hasDeletions;
233: }
234:
235: // check first reader
236: public boolean isDeleted(int n) {
237: // Don't call ensureOpen() here (it could affect performance)
238: if (readers.size() > 0)
239: return ((IndexReader) readers.get(0)).isDeleted(n);
240: return false;
241: }
242:
243: // delete in all readers
244: protected void doDelete(int n) throws CorruptIndexException,
245: IOException {
246: for (int i = 0; i < readers.size(); i++) {
247: ((IndexReader) readers.get(i)).deleteDocument(n);
248: }
249: hasDeletions = true;
250: }
251:
252: // undeleteAll in all readers
253: protected void doUndeleteAll() throws CorruptIndexException,
254: IOException {
255: for (int i = 0; i < readers.size(); i++) {
256: ((IndexReader) readers.get(i)).undeleteAll();
257: }
258: hasDeletions = false;
259: }
260:
261: // append fields from storedFieldReaders
262: public Document document(int n, FieldSelector fieldSelector)
263: throws CorruptIndexException, IOException {
264: ensureOpen();
265: Document result = new Document();
266: for (int i = 0; i < storedFieldReaders.size(); i++) {
267: IndexReader reader = (IndexReader) storedFieldReaders
268: .get(i);
269:
270: boolean include = (fieldSelector == null);
271: if (!include) {
272: Iterator it = ((Collection) readerToFields.get(reader))
273: .iterator();
274: while (it.hasNext())
275: if (fieldSelector.accept((String) it.next()) != FieldSelectorResult.NO_LOAD) {
276: include = true;
277: break;
278: }
279: }
280: if (include) {
281: Iterator fieldIterator = reader.document(n,
282: fieldSelector).getFields().iterator();
283: while (fieldIterator.hasNext()) {
284: result.add((Fieldable) fieldIterator.next());
285: }
286: }
287: }
288: return result;
289: }
290:
291: // get all vectors
292: public TermFreqVector[] getTermFreqVectors(int n)
293: throws IOException {
294: ensureOpen();
295: ArrayList results = new ArrayList();
296: Iterator i = fieldToReader.entrySet().iterator();
297: while (i.hasNext()) {
298: Map.Entry e = (Map.Entry) i.next();
299: String field = (String) e.getKey();
300: IndexReader reader = (IndexReader) e.getValue();
301: TermFreqVector vector = reader.getTermFreqVector(n, field);
302: if (vector != null)
303: results.add(vector);
304: }
305: return (TermFreqVector[]) results
306: .toArray(new TermFreqVector[results.size()]);
307: }
308:
309: public TermFreqVector getTermFreqVector(int n, String field)
310: throws IOException {
311: ensureOpen();
312: IndexReader reader = ((IndexReader) fieldToReader.get(field));
313: return reader == null ? null : reader.getTermFreqVector(n,
314: field);
315: }
316:
317: public void getTermFreqVector(int docNumber, String field,
318: TermVectorMapper mapper) throws IOException {
319: ensureOpen();
320: IndexReader reader = ((IndexReader) fieldToReader.get(field));
321: if (reader != null) {
322: reader.getTermFreqVector(docNumber, field, mapper);
323: }
324: }
325:
326: public void getTermFreqVector(int docNumber, TermVectorMapper mapper)
327: throws IOException {
328: ensureOpen();
329: ensureOpen();
330:
331: Iterator i = fieldToReader.entrySet().iterator();
332: while (i.hasNext()) {
333: Map.Entry e = (Map.Entry) i.next();
334: String field = (String) e.getKey();
335: IndexReader reader = (IndexReader) e.getValue();
336: reader.getTermFreqVector(docNumber, field, mapper);
337: }
338:
339: }
340:
341: public boolean hasNorms(String field) throws IOException {
342: ensureOpen();
343: IndexReader reader = ((IndexReader) fieldToReader.get(field));
344: return reader == null ? false : reader.hasNorms(field);
345: }
346:
347: public byte[] norms(String field) throws IOException {
348: ensureOpen();
349: IndexReader reader = ((IndexReader) fieldToReader.get(field));
350: return reader == null ? null : reader.norms(field);
351: }
352:
353: public void norms(String field, byte[] result, int offset)
354: throws IOException {
355: ensureOpen();
356: IndexReader reader = ((IndexReader) fieldToReader.get(field));
357: if (reader != null)
358: reader.norms(field, result, offset);
359: }
360:
361: protected void doSetNorm(int n, String field, byte value)
362: throws CorruptIndexException, IOException {
363: IndexReader reader = ((IndexReader) fieldToReader.get(field));
364: if (reader != null)
365: reader.doSetNorm(n, field, value);
366: }
367:
368: public TermEnum terms() throws IOException {
369: ensureOpen();
370: return new ParallelTermEnum();
371: }
372:
373: public TermEnum terms(Term term) throws IOException {
374: ensureOpen();
375: return new ParallelTermEnum(term);
376: }
377:
378: public int docFreq(Term term) throws IOException {
379: ensureOpen();
380: IndexReader reader = ((IndexReader) fieldToReader.get(term
381: .field()));
382: return reader == null ? 0 : reader.docFreq(term);
383: }
384:
385: public TermDocs termDocs(Term term) throws IOException {
386: ensureOpen();
387: return new ParallelTermDocs(term);
388: }
389:
390: public TermDocs termDocs() throws IOException {
391: ensureOpen();
392: return new ParallelTermDocs();
393: }
394:
395: public TermPositions termPositions(Term term) throws IOException {
396: ensureOpen();
397: return new ParallelTermPositions(term);
398: }
399:
400: public TermPositions termPositions() throws IOException {
401: ensureOpen();
402: return new ParallelTermPositions();
403: }
404:
405: /**
406: * Checks recursively if all subreaders are up to date.
407: */
408: public boolean isCurrent() throws CorruptIndexException,
409: IOException {
410: for (int i = 0; i < readers.size(); i++) {
411: if (!((IndexReader) readers.get(i)).isCurrent()) {
412: return false;
413: }
414: }
415:
416: // all subreaders are up to date
417: return true;
418: }
419:
420: /**
421: * Checks recursively if all subindexes are optimized
422: */
423: public boolean isOptimized() {
424: for (int i = 0; i < readers.size(); i++) {
425: if (!((IndexReader) readers.get(i)).isOptimized()) {
426: return false;
427: }
428: }
429:
430: // all subindexes are optimized
431: return true;
432: }
433:
434: /** Not implemented.
435: * @throws UnsupportedOperationException
436: */
437: public long getVersion() {
438: throw new UnsupportedOperationException(
439: "ParallelReader does not support this method.");
440: }
441:
442: // for testing
443: IndexReader[] getSubReaders() {
444: return (IndexReader[]) readers.toArray(new IndexReader[readers
445: .size()]);
446: }
447:
448: protected void doCommit() throws IOException {
449: for (int i = 0; i < readers.size(); i++)
450: ((IndexReader) readers.get(i)).commit();
451: }
452:
453: protected synchronized void doClose() throws IOException {
454: for (int i = 0; i < readers.size(); i++) {
455: if (((Boolean) decrefOnClose.get(i)).booleanValue()) {
456: ((IndexReader) readers.get(i)).decRef();
457: } else {
458: ((IndexReader) readers.get(i)).close();
459: }
460: }
461: }
462:
463: public Collection getFieldNames(IndexReader.FieldOption fieldNames) {
464: ensureOpen();
465: Set fieldSet = new HashSet();
466: for (int i = 0; i < readers.size(); i++) {
467: IndexReader reader = ((IndexReader) readers.get(i));
468: Collection names = reader.getFieldNames(fieldNames);
469: fieldSet.addAll(names);
470: }
471: return fieldSet;
472: }
473:
474: private class ParallelTermEnum extends TermEnum {
475: private String field;
476: private Iterator fieldIterator;
477: private TermEnum termEnum;
478:
479: public ParallelTermEnum() throws IOException {
480: field = (String) fieldToReader.firstKey();
481: if (field != null)
482: termEnum = ((IndexReader) fieldToReader.get(field))
483: .terms();
484: }
485:
486: public ParallelTermEnum(Term term) throws IOException {
487: field = term.field();
488: IndexReader reader = ((IndexReader) fieldToReader
489: .get(field));
490: if (reader != null)
491: termEnum = reader.terms(term);
492: }
493:
494: public boolean next() throws IOException {
495: if (termEnum == null)
496: return false;
497:
498: // another term in this field?
499: if (termEnum.next() && termEnum.term().field() == field)
500: return true; // yes, keep going
501:
502: termEnum.close(); // close old termEnum
503:
504: // find the next field with terms, if any
505: if (fieldIterator == null) {
506: fieldIterator = fieldToReader.tailMap(field).keySet()
507: .iterator();
508: fieldIterator.next(); // Skip field to get next one
509: }
510: while (fieldIterator.hasNext()) {
511: field = (String) fieldIterator.next();
512: termEnum = ((IndexReader) fieldToReader.get(field))
513: .terms(new Term(field, ""));
514: Term term = termEnum.term();
515: if (term != null && term.field() == field)
516: return true;
517: else
518: termEnum.close();
519: }
520:
521: return false; // no more fields
522: }
523:
524: public Term term() {
525: if (termEnum == null)
526: return null;
527:
528: return termEnum.term();
529: }
530:
531: public int docFreq() {
532: if (termEnum == null)
533: return 0;
534:
535: return termEnum.docFreq();
536: }
537:
538: public void close() throws IOException {
539: if (termEnum != null)
540: termEnum.close();
541: }
542:
543: }
544:
545: // wrap a TermDocs in order to support seek(Term)
546: private class ParallelTermDocs implements TermDocs {
547: protected TermDocs termDocs;
548:
549: public ParallelTermDocs() {
550: }
551:
552: public ParallelTermDocs(Term term) throws IOException {
553: seek(term);
554: }
555:
556: public int doc() {
557: return termDocs.doc();
558: }
559:
560: public int freq() {
561: return termDocs.freq();
562: }
563:
564: public void seek(Term term) throws IOException {
565: IndexReader reader = ((IndexReader) fieldToReader.get(term
566: .field()));
567: termDocs = reader != null ? reader.termDocs(term) : null;
568: }
569:
570: public void seek(TermEnum termEnum) throws IOException {
571: seek(termEnum.term());
572: }
573:
574: public boolean next() throws IOException {
575: if (termDocs == null)
576: return false;
577:
578: return termDocs.next();
579: }
580:
581: public int read(final int[] docs, final int[] freqs)
582: throws IOException {
583: if (termDocs == null)
584: return 0;
585:
586: return termDocs.read(docs, freqs);
587: }
588:
589: public boolean skipTo(int target) throws IOException {
590: if (termDocs == null)
591: return false;
592:
593: return termDocs.skipTo(target);
594: }
595:
596: public void close() throws IOException {
597: if (termDocs != null)
598: termDocs.close();
599: }
600:
601: }
602:
603: private class ParallelTermPositions extends ParallelTermDocs
604: implements TermPositions {
605:
606: public ParallelTermPositions() {
607: }
608:
609: public ParallelTermPositions(Term term) throws IOException {
610: seek(term);
611: }
612:
613: public void seek(Term term) throws IOException {
614: IndexReader reader = ((IndexReader) fieldToReader.get(term
615: .field()));
616: termDocs = reader != null ? reader.termPositions(term)
617: : null;
618: }
619:
620: public int nextPosition() throws IOException {
621: // It is an error to call this if there is no next position, e.g. if termDocs==null
622: return ((TermPositions) termDocs).nextPosition();
623: }
624:
625: public int getPayloadLength() {
626: return ((TermPositions) termDocs).getPayloadLength();
627: }
628:
629: public byte[] getPayload(byte[] data, int offset)
630: throws IOException {
631: return ((TermPositions) termDocs).getPayload(data, offset);
632: }
633:
634: // TODO: Remove warning after API has been finalized
635: public boolean isPayloadAvailable() {
636: return ((TermPositions) termDocs).isPayloadAvailable();
637: }
638: }
639:
640: }
|