001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.store.IndexInput;
022: import org.apache.lucene.store.IndexOutput;
023:
024: import java.io.File;
025: import java.io.FileNotFoundException;
026: import java.io.IOException;
027: import java.io.PrintStream;
028: import java.util.Vector;
029:
030: final class SegmentInfos extends Vector {
031:
032: /** The file format version, a negative number. */
033: /* Works since counter, the old 1st entry, is always >= 0 */
034: public static final int FORMAT = -1;
035:
036: /** This format adds details used for lockless commits. It differs
037: * slightly from the previous format in that file names
038: * are never re-used (write once). Instead, each file is
039: * written to the next generation. For example,
040: * segments_1, segments_2, etc. This allows us to not use
041: * a commit lock. See <a
042: * href="http://lucene.apache.org/java/docs/fileformats.html">file
043: * formats</a> for details.
044: */
045: public static final int FORMAT_LOCKLESS = -2;
046:
047: /** This format adds a "hasSingleNormFile" flag into each segment info.
048: * See <a href="http://issues.apache.org/jira/browse/LUCENE-756">LUCENE-756</a>
049: * for details.
050: */
051: public static final int FORMAT_SINGLE_NORM_FILE = -3;
052:
053: /** This format allows multiple segments to share a single
054: * vectors and stored fields file. */
055: public static final int FORMAT_SHARED_DOC_STORE = -4;
056:
057: /* This must always point to the most recent file format. */
058: private static final int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE;
059:
060: public int counter = 0; // used to name new segments
061: /**
062: * counts how often the index has been changed by adding or deleting docs.
063: * starting with the current time in milliseconds forces to create unique version numbers.
064: */
065: private long version = System.currentTimeMillis();
066:
067: private long generation = 0; // generation of the "segments_N" for the next commit
068: private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
069: // or wrote; this is normally the same as generation except if
070: // there was an IOException that had interrupted a commit
071:
072: /**
073: * If non-null, information about loading segments_N files
074: * will be printed here. @see #setInfoStream.
075: */
076: private static PrintStream infoStream;
077:
078: public final SegmentInfo info(int i) {
079: return (SegmentInfo) elementAt(i);
080: }
081:
082: /**
083: * Get the generation (N) of the current segments_N file
084: * from a list of files.
085: *
086: * @param files -- array of file names to check
087: */
088: public static long getCurrentSegmentGeneration(String[] files) {
089: if (files == null) {
090: return -1;
091: }
092: long max = -1;
093: for (int i = 0; i < files.length; i++) {
094: String file = files[i];
095: if (file.startsWith(IndexFileNames.SEGMENTS)
096: && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
097: long gen = generationFromSegmentsFileName(file);
098: if (gen > max) {
099: max = gen;
100: }
101: }
102: }
103: return max;
104: }
105:
106: /**
107: * Get the generation (N) of the current segments_N file
108: * in the directory.
109: *
110: * @param directory -- directory to search for the latest segments_N file
111: */
112: public static long getCurrentSegmentGeneration(Directory directory)
113: throws IOException {
114: String[] files = directory.list();
115: if (files == null)
116: throw new IOException("cannot read directory " + directory
117: + ": list() returned null");
118: return getCurrentSegmentGeneration(files);
119: }
120:
121: /**
122: * Get the filename of the current segments_N file
123: * from a list of files.
124: *
125: * @param files -- array of file names to check
126: */
127:
128: public static String getCurrentSegmentFileName(String[] files)
129: throws IOException {
130: return IndexFileNames.fileNameFromGeneration(
131: IndexFileNames.SEGMENTS, "",
132: getCurrentSegmentGeneration(files));
133: }
134:
135: /**
136: * Get the filename of the current segments_N file
137: * in the directory.
138: *
139: * @param directory -- directory to search for the latest segments_N file
140: */
141: public static String getCurrentSegmentFileName(Directory directory)
142: throws IOException {
143: return IndexFileNames.fileNameFromGeneration(
144: IndexFileNames.SEGMENTS, "",
145: getCurrentSegmentGeneration(directory));
146: }
147:
148: /**
149: * Get the segments_N filename in use by this segment infos.
150: */
151: public String getCurrentSegmentFileName() {
152: return IndexFileNames.fileNameFromGeneration(
153: IndexFileNames.SEGMENTS, "", lastGeneration);
154: }
155:
156: /**
157: * Parse the generation off the segments file name and
158: * return it.
159: */
160: public static long generationFromSegmentsFileName(String fileName) {
161: if (fileName.equals(IndexFileNames.SEGMENTS)) {
162: return 0;
163: } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
164: return Long.parseLong(fileName
165: .substring(1 + IndexFileNames.SEGMENTS.length()),
166: Character.MAX_RADIX);
167: } else {
168: throw new IllegalArgumentException("fileName \"" + fileName
169: + "\" is not a segments file");
170: }
171: }
172:
173: /**
174: * Get the next segments_N filename that will be written.
175: */
176: public String getNextSegmentFileName() {
177: long nextGeneration;
178:
179: if (generation == -1) {
180: nextGeneration = 1;
181: } else {
182: nextGeneration = generation + 1;
183: }
184: return IndexFileNames.fileNameFromGeneration(
185: IndexFileNames.SEGMENTS, "", nextGeneration);
186: }
187:
188: /**
189: * Read a particular segmentFileName. Note that this may
190: * throw an IOException if a commit is in process.
191: *
192: * @param directory -- directory containing the segments file
193: * @param segmentFileName -- segment file to load
194: * @throws CorruptIndexException if the index is corrupt
195: * @throws IOException if there is a low-level IO error
196: */
197: public final void read(Directory directory, String segmentFileName)
198: throws CorruptIndexException, IOException {
199: boolean success = false;
200:
201: // Clear any previous segments:
202: clear();
203:
204: IndexInput input = directory.openInput(segmentFileName);
205:
206: generation = generationFromSegmentsFileName(segmentFileName);
207:
208: lastGeneration = generation;
209:
210: try {
211: int format = input.readInt();
212: if (format < 0) { // file contains explicit format info
213: // check that it is a format we can understand
214: if (format < CURRENT_FORMAT)
215: throw new CorruptIndexException(
216: "Unknown format version: " + format);
217: version = input.readLong(); // read version
218: counter = input.readInt(); // read counter
219: } else { // file is in old format without explicit format info
220: counter = format;
221: }
222:
223: for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
224: addElement(new SegmentInfo(directory, format, input));
225: }
226:
227: if (format >= 0) { // in old format the version number may be at the end of the file
228: if (input.getFilePointer() >= input.length())
229: version = System.currentTimeMillis(); // old file format without version number
230: else
231: version = input.readLong(); // read version
232: }
233: success = true;
234: } finally {
235: input.close();
236: if (!success) {
237: // Clear any segment infos we had loaded so we
238: // have a clean slate on retry:
239: clear();
240: }
241: }
242: }
243:
244: /**
245: * This version of read uses the retry logic (for lock-less
246: * commits) to find the right segments file to load.
247: * @throws CorruptIndexException if the index is corrupt
248: * @throws IOException if there is a low-level IO error
249: */
250: public final void read(Directory directory)
251: throws CorruptIndexException, IOException {
252:
253: generation = lastGeneration = -1;
254:
255: new FindSegmentsFile(directory) {
256:
257: protected Object doBody(String segmentFileName)
258: throws CorruptIndexException, IOException {
259: read(directory, segmentFileName);
260: return null;
261: }
262: }.run();
263: }
264:
265: public final void write(Directory directory) throws IOException {
266:
267: String segmentFileName = getNextSegmentFileName();
268:
269: // Always advance the generation on write:
270: if (generation == -1) {
271: generation = 1;
272: } else {
273: generation++;
274: }
275:
276: IndexOutput output = directory.createOutput(segmentFileName);
277:
278: boolean success = false;
279:
280: try {
281: output.writeInt(CURRENT_FORMAT); // write FORMAT
282: output.writeLong(++version); // every write changes
283: // the index
284: output.writeInt(counter); // write counter
285: output.writeInt(size()); // write infos
286: for (int i = 0; i < size(); i++) {
287: info(i).write(output);
288: }
289: } finally {
290: try {
291: output.close();
292: success = true;
293: } finally {
294: if (!success) {
295: // Try not to leave a truncated segments_N file in
296: // the index:
297: directory.deleteFile(segmentFileName);
298: }
299: }
300: }
301:
302: try {
303: output = directory
304: .createOutput(IndexFileNames.SEGMENTS_GEN);
305: try {
306: output.writeInt(FORMAT_LOCKLESS);
307: output.writeLong(generation);
308: output.writeLong(generation);
309: } finally {
310: output.close();
311: }
312: } catch (IOException e) {
313: // It's OK if we fail to write this file since it's
314: // used only as one of the retry fallbacks.
315: }
316:
317: lastGeneration = generation;
318: }
319:
320: /**
321: * Returns a copy of this instance, also copying each
322: * SegmentInfo.
323: */
324:
325: public Object clone() {
326: SegmentInfos sis = (SegmentInfos) super .clone();
327: for (int i = 0; i < sis.size(); i++) {
328: sis.setElementAt(((SegmentInfo) sis.elementAt(i)).clone(),
329: i);
330: }
331: return sis;
332: }
333:
334: /**
335: * version number when this SegmentInfos was generated.
336: */
337: public long getVersion() {
338: return version;
339: }
340:
341: public long getGeneration() {
342: return generation;
343: }
344:
345: public long getLastGeneration() {
346: return lastGeneration;
347: }
348:
349: /**
350: * Current version number from segments file.
351: * @throws CorruptIndexException if the index is corrupt
352: * @throws IOException if there is a low-level IO error
353: */
354: public static long readCurrentVersion(Directory directory)
355: throws CorruptIndexException, IOException {
356:
357: return ((Long) new FindSegmentsFile(directory) {
358: protected Object doBody(String segmentFileName)
359: throws CorruptIndexException, IOException {
360:
361: IndexInput input = directory.openInput(segmentFileName);
362:
363: int format = 0;
364: long version = 0;
365: try {
366: format = input.readInt();
367: if (format < 0) {
368: if (format < CURRENT_FORMAT)
369: throw new CorruptIndexException(
370: "Unknown format version: " + format);
371: version = input.readLong(); // read version
372: }
373: } finally {
374: input.close();
375: }
376:
377: if (format < 0)
378: return new Long(version);
379:
380: // We cannot be sure about the format of the file.
381: // Therefore we have to read the whole file and cannot simply seek to the version entry.
382: SegmentInfos sis = new SegmentInfos();
383: sis.read(directory, segmentFileName);
384: return new Long(sis.getVersion());
385: }
386: }.run()).longValue();
387: }
388:
389: /** If non-null, information about retries when loading
390: * the segments file will be printed to this.
391: */
392: public static void setInfoStream(PrintStream infoStream) {
393: SegmentInfos.infoStream = infoStream;
394: }
395:
396: /* Advanced configuration of retry logic in loading
397: segments_N file */
398: private static int defaultGenFileRetryCount = 10;
399: private static int defaultGenFileRetryPauseMsec = 50;
400: private static int defaultGenLookaheadCount = 10;
401:
402: /**
403: * Advanced: set how many times to try loading the
404: * segments.gen file contents to determine current segment
405: * generation. This file is only referenced when the
406: * primary method (listing the directory) fails.
407: */
408: public static void setDefaultGenFileRetryCount(int count) {
409: defaultGenFileRetryCount = count;
410: }
411:
412: /**
413: * @see #setDefaultGenFileRetryCount
414: */
415: public static int getDefaultGenFileRetryCount() {
416: return defaultGenFileRetryCount;
417: }
418:
419: /**
420: * Advanced: set how many milliseconds to pause in between
421: * attempts to load the segments.gen file.
422: */
423: public static void setDefaultGenFileRetryPauseMsec(int msec) {
424: defaultGenFileRetryPauseMsec = msec;
425: }
426:
427: /**
428: * @see #setDefaultGenFileRetryPauseMsec
429: */
430: public static int getDefaultGenFileRetryPauseMsec() {
431: return defaultGenFileRetryPauseMsec;
432: }
433:
434: /**
435: * Advanced: set how many times to try incrementing the
436: * gen when loading the segments file. This only runs if
437: * the primary (listing directory) and secondary (opening
438: * segments.gen file) methods fail to find the segments
439: * file.
440: */
441: public static void setDefaultGenLookaheadCount(int count) {
442: defaultGenLookaheadCount = count;
443: }
444:
445: /**
446: * @see #setDefaultGenLookaheadCount
447: */
448: public static int getDefaultGenLookahedCount() {
449: return defaultGenLookaheadCount;
450: }
451:
452: /**
453: * @see #setInfoStream
454: */
455: public static PrintStream getInfoStream() {
456: return infoStream;
457: }
458:
459: private static void message(String message) {
460: if (infoStream != null) {
461: infoStream.println("SIS ["
462: + Thread.currentThread().getName() + "]: "
463: + message);
464: }
465: }
466:
467: /**
468: * Utility class for executing code that needs to do
469: * something with the current segments file. This is
470: * necessary with lock-less commits because from the time
471: * you locate the current segments file name, until you
472: * actually open it, read its contents, or check modified
473: * time, etc., it could have been deleted due to a writer
474: * commit finishing.
475: */
476: public abstract static class FindSegmentsFile {
477:
478: File fileDirectory;
479: Directory directory;
480:
481: public FindSegmentsFile(File directory) {
482: this .fileDirectory = directory;
483: }
484:
485: public FindSegmentsFile(Directory directory) {
486: this .directory = directory;
487: }
488:
489: public Object run() throws CorruptIndexException, IOException {
490: String segmentFileName = null;
491: long lastGen = -1;
492: long gen = 0;
493: int genLookaheadCount = 0;
494: IOException exc = null;
495: boolean retry = false;
496:
497: int method = 0;
498:
499: // Loop until we succeed in calling doBody() without
500: // hitting an IOException. An IOException most likely
501: // means a commit was in process and has finished, in
502: // the time it took us to load the now-old infos files
503: // (and segments files). It's also possible it's a
504: // true error (corrupt index). To distinguish these,
505: // on each retry we must see "forward progress" on
506: // which generation we are trying to load. If we
507: // don't, then the original error is real and we throw
508: // it.
509:
510: // We have three methods for determining the current
511: // generation. We try the first two in parallel, and
512: // fall back to the third when necessary.
513:
514: while (true) {
515:
516: if (0 == method) {
517:
518: // Method 1: list the directory and use the highest
519: // segments_N file. This method works well as long
520: // as there is no stale caching on the directory
521: // contents (NOTE: NFS clients often have such stale
522: // caching):
523: String[] files = null;
524:
525: long genA = -1;
526:
527: if (directory != null)
528: files = directory.list();
529: else
530: files = fileDirectory.list();
531:
532: if (files != null)
533: genA = getCurrentSegmentGeneration(files);
534:
535: message("directory listing genA=" + genA);
536:
537: // Method 2: open segments.gen and read its
538: // contents. Then we take the larger of the two
539: // gen's. This way, if either approach is hitting
540: // a stale cache (NFS) we have a better chance of
541: // getting the right generation.
542: long genB = -1;
543: if (directory != null) {
544: for (int i = 0; i < defaultGenFileRetryCount; i++) {
545: IndexInput genInput = null;
546: try {
547: genInput = directory
548: .openInput(IndexFileNames.SEGMENTS_GEN);
549: } catch (FileNotFoundException e) {
550: message("segments.gen open: FileNotFoundException "
551: + e);
552: break;
553: } catch (IOException e) {
554: message("segments.gen open: IOException "
555: + e);
556: }
557:
558: if (genInput != null) {
559: try {
560: int version = genInput.readInt();
561: if (version == FORMAT_LOCKLESS) {
562: long gen0 = genInput.readLong();
563: long gen1 = genInput.readLong();
564: message("fallback check: "
565: + gen0 + "; " + gen1);
566: if (gen0 == gen1) {
567: // The file is consistent.
568: genB = gen0;
569: break;
570: }
571: }
572: } catch (IOException err2) {
573: // will retry
574: } finally {
575: genInput.close();
576: }
577: }
578: try {
579: Thread
580: .sleep(defaultGenFileRetryPauseMsec);
581: } catch (InterruptedException e) {
582: // will retry
583: }
584: }
585: }
586:
587: message(IndexFileNames.SEGMENTS_GEN
588: + " check: genB=" + genB);
589:
590: // Pick the larger of the two gen's:
591: if (genA > genB)
592: gen = genA;
593: else
594: gen = genB;
595:
596: if (gen == -1) {
597: // Neither approach found a generation
598: String s;
599: if (files != null) {
600: s = "";
601: for (int i = 0; i < files.length; i++)
602: s += " " + files[i];
603: } else
604: s = " null";
605: throw new FileNotFoundException(
606: "no segments* file found in "
607: + directory + ": files:" + s);
608: }
609: }
610:
611: // Third method (fallback if first & second methods
612: // are not reliable): since both directory cache and
613: // file contents cache seem to be stale, just
614: // advance the generation.
615: if (1 == method
616: || (0 == method && lastGen == gen && retry)) {
617:
618: method = 1;
619:
620: if (genLookaheadCount < defaultGenLookaheadCount) {
621: gen++;
622: genLookaheadCount++;
623: message("look ahead increment gen to " + gen);
624: }
625: }
626:
627: if (lastGen == gen) {
628:
629: // This means we're about to try the same
630: // segments_N last tried. This is allowed,
631: // exactly once, because writer could have been in
632: // the process of writing segments_N last time.
633:
634: if (retry) {
635: // OK, we've tried the same segments_N file
636: // twice in a row, so this must be a real
637: // error. We throw the original exception we
638: // got.
639: throw exc;
640: } else {
641: retry = true;
642: }
643:
644: } else {
645: // Segment file has advanced since our last loop, so
646: // reset retry:
647: retry = false;
648: }
649:
650: lastGen = gen;
651:
652: segmentFileName = IndexFileNames
653: .fileNameFromGeneration(
654: IndexFileNames.SEGMENTS, "", gen);
655:
656: try {
657: Object v = doBody(segmentFileName);
658: if (exc != null) {
659: message("success on " + segmentFileName);
660: }
661: return v;
662: } catch (IOException err) {
663:
664: // Save the original root cause:
665: if (exc == null) {
666: exc = err;
667: }
668:
669: message("primary Exception on '" + segmentFileName
670: + "': " + err + "'; will retry: retry="
671: + retry + "; gen = " + gen);
672:
673: if (!retry && gen > 1) {
674:
675: // This is our first time trying this segments
676: // file (because retry is false), and, there is
677: // possibly a segments_(N-1) (because gen > 1).
678: // So, check if the segments_(N-1) exists and
679: // try it if so:
680: String prevSegmentFileName = IndexFileNames
681: .fileNameFromGeneration(
682: IndexFileNames.SEGMENTS, "",
683: gen - 1);
684:
685: final boolean prevExists;
686: if (directory != null)
687: prevExists = directory
688: .fileExists(prevSegmentFileName);
689: else
690: prevExists = new File(fileDirectory,
691: prevSegmentFileName).exists();
692:
693: if (prevExists) {
694: message("fallback to prior segment file '"
695: + prevSegmentFileName + "'");
696: try {
697: Object v = doBody(prevSegmentFileName);
698: if (exc != null) {
699: message("success on fallback "
700: + prevSegmentFileName);
701: }
702: return v;
703: } catch (IOException err2) {
704: message("secondary Exception on '"
705: + prevSegmentFileName + "': "
706: + err2 + "'; will retry");
707: }
708: }
709: }
710: }
711: }
712: }
713:
714: /**
715: * Subclass must implement this. The assumption is an
716: * IOException will be thrown if something goes wrong
717: * during the processing that could have been caused by
718: * a writer committing.
719: */
720: protected abstract Object doBody(String segmentFileName)
721: throws CorruptIndexException, IOException;
722: }
723:
724: /**
725: * Returns a new SegmentInfos containg the SegmentInfo
726: * instances in the specified range first (inclusive) to
727: * last (exclusive), so total number of segments returned
728: * is last-first.
729: */
730: public SegmentInfos range(int first, int last) {
731: SegmentInfos infos = new SegmentInfos();
732: infos.addAll(super.subList(first, last));
733: return infos;
734: }
735: }
|