001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.store.IndexOutput;
022: import org.apache.lucene.store.IndexInput;
023: import java.io.IOException;
024: import java.util.List;
025: import java.util.ArrayList;
026:
027: final class SegmentInfo {
028:
029: static final int NO = -1; // e.g. no norms; no deletes;
030: static final int YES = 1; // e.g. have norms; have deletes;
031: static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
032: static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
033:
034: public String name; // unique name in dir
035: public int docCount; // number of docs in seg
036: public Directory dir; // where segment resides
037:
038: private boolean preLockless; // true if this is a segments file written before
039: // lock-less commits (2.1)
040:
041: private long delGen; // current generation of del file; NO if there
042: // are no deletes; CHECK_DIR if it's a pre-2.1 segment
043: // (and we must check filesystem); YES or higher if
044: // there are deletes at generation N
045:
046: private long[] normGen; // current generation of each field's norm file.
047: // If this array is null, for lockLess this means no
048: // separate norms. For preLockLess this means we must
049: // check filesystem. If this array is not null, its
050: // values mean: NO says this field has no separate
051: // norms; CHECK_DIR says it is a preLockLess segment and
052: // filesystem must be checked; >= YES says this field
053: // has separate norms with the specified generation
054:
055: private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
056: // pre-2.1 (ie, must check file system to see
057: // if <name>.cfs and <name>.nrm exist)
058:
059: private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
060: // false otherwise
061: // this is currently false for segments populated by DocumentWriter
062: // and true for newly created merged segments (both
063: // compound and non compound).
064:
065: private List files; // cached list of files that this segment uses
066: // in the Directory
067:
068: long sizeInBytes = -1; // total byte size of all of our files (computed on demand)
069:
070: private int docStoreOffset; // if this segment shares stored fields & vectors, this
071: // offset is where in that file this segment's docs begin
072: private String docStoreSegment; // name used to derive fields/vectors file we share with
073: // other segments
074: private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
075:
076: public SegmentInfo(String name, int docCount, Directory dir) {
077: this .name = name;
078: this .docCount = docCount;
079: this .dir = dir;
080: delGen = NO;
081: isCompoundFile = CHECK_DIR;
082: preLockless = true;
083: hasSingleNormFile = false;
084: docStoreOffset = -1;
085: docStoreSegment = name;
086: docStoreIsCompoundFile = false;
087: }
088:
089: public SegmentInfo(String name, int docCount, Directory dir,
090: boolean isCompoundFile, boolean hasSingleNormFile) {
091: this (name, docCount, dir, isCompoundFile, hasSingleNormFile,
092: -1, null, false);
093: }
094:
095: public SegmentInfo(String name, int docCount, Directory dir,
096: boolean isCompoundFile, boolean hasSingleNormFile,
097: int docStoreOffset, String docStoreSegment,
098: boolean docStoreIsCompoundFile) {
099: this (name, docCount, dir);
100: this .isCompoundFile = (byte) (isCompoundFile ? YES : NO);
101: this .hasSingleNormFile = hasSingleNormFile;
102: preLockless = false;
103: this .docStoreOffset = docStoreOffset;
104: this .docStoreSegment = docStoreSegment;
105: this .docStoreIsCompoundFile = docStoreIsCompoundFile;
106: assert docStoreOffset == -1 || docStoreSegment != null;
107: }
108:
109: /**
110: * Copy everything from src SegmentInfo into our instance.
111: */
112: void reset(SegmentInfo src) {
113: clearFiles();
114: name = src.name;
115: docCount = src.docCount;
116: dir = src.dir;
117: preLockless = src.preLockless;
118: delGen = src.delGen;
119: docStoreOffset = src.docStoreOffset;
120: docStoreIsCompoundFile = src.docStoreIsCompoundFile;
121: if (src.normGen == null) {
122: normGen = null;
123: } else {
124: normGen = new long[src.normGen.length];
125: System.arraycopy(src.normGen, 0, normGen, 0,
126: src.normGen.length);
127: }
128: isCompoundFile = src.isCompoundFile;
129: hasSingleNormFile = src.hasSingleNormFile;
130: }
131:
132: /**
133: * Construct a new SegmentInfo instance by reading a
134: * previously saved SegmentInfo from input.
135: *
136: * @param dir directory to load from
137: * @param format format of the segments info file
138: * @param input input handle to read segment info from
139: */
140: SegmentInfo(Directory dir, int format, IndexInput input)
141: throws IOException {
142: this .dir = dir;
143: name = input.readString();
144: docCount = input.readInt();
145: if (format <= SegmentInfos.FORMAT_LOCKLESS) {
146: delGen = input.readLong();
147: if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
148: docStoreOffset = input.readInt();
149: if (docStoreOffset != -1) {
150: docStoreSegment = input.readString();
151: docStoreIsCompoundFile = (1 == input.readByte());
152: } else {
153: docStoreSegment = name;
154: docStoreIsCompoundFile = false;
155: }
156: } else {
157: docStoreOffset = -1;
158: docStoreSegment = name;
159: docStoreIsCompoundFile = false;
160: }
161: if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
162: hasSingleNormFile = (1 == input.readByte());
163: } else {
164: hasSingleNormFile = false;
165: }
166: int numNormGen = input.readInt();
167: if (numNormGen == NO) {
168: normGen = null;
169: } else {
170: normGen = new long[numNormGen];
171: for (int j = 0; j < numNormGen; j++) {
172: normGen[j] = input.readLong();
173: }
174: }
175: isCompoundFile = input.readByte();
176: preLockless = (isCompoundFile == CHECK_DIR);
177: } else {
178: delGen = CHECK_DIR;
179: normGen = null;
180: isCompoundFile = CHECK_DIR;
181: preLockless = true;
182: hasSingleNormFile = false;
183: docStoreOffset = -1;
184: docStoreIsCompoundFile = false;
185: docStoreSegment = null;
186: }
187: }
188:
189: void setNumFields(int numFields) {
190: if (normGen == null) {
191: // normGen is null if we loaded a pre-2.1 segment
192: // file, or, if this segments file hasn't had any
193: // norms set against it yet:
194: normGen = new long[numFields];
195:
196: if (preLockless) {
197: // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
198: // we have to check filesystem for norm files, because this is prelockless.
199:
200: } else {
201: // This is a FORMAT_LOCKLESS segment, which means
202: // there are no separate norms:
203: for (int i = 0; i < numFields; i++) {
204: normGen[i] = NO;
205: }
206: }
207: }
208: }
209:
210: /** Returns total size in bytes of all of files used by
211: * this segment. */
212: long sizeInBytes() throws IOException {
213: if (sizeInBytes == -1) {
214: List files = files();
215: final int size = files.size();
216: sizeInBytes = 0;
217: for (int i = 0; i < size; i++) {
218: final String fileName = (String) files.get(i);
219: // We don't count bytes used by a shared doc store
220: // against this segment:
221: if (docStoreOffset == -1
222: || !IndexFileNames.isDocStoreFile(fileName))
223: sizeInBytes += dir.fileLength(fileName);
224: }
225: }
226: return sizeInBytes;
227: }
228:
229: boolean hasDeletions() throws IOException {
230: // Cases:
231: //
232: // delGen == NO: this means this segment was written
233: // by the LOCKLESS code and for certain does not have
234: // deletions yet
235: //
236: // delGen == CHECK_DIR: this means this segment was written by
237: // pre-LOCKLESS code which means we must check
238: // directory to see if .del file exists
239: //
240: // delGen >= YES: this means this segment was written by
241: // the LOCKLESS code and for certain has
242: // deletions
243: //
244: if (delGen == NO) {
245: return false;
246: } else if (delGen >= YES) {
247: return true;
248: } else {
249: return dir.fileExists(getDelFileName());
250: }
251: }
252:
253: void advanceDelGen() {
254: // delGen 0 is reserved for pre-LOCKLESS format
255: if (delGen == NO) {
256: delGen = YES;
257: } else {
258: delGen++;
259: }
260: clearFiles();
261: }
262:
263: void clearDelGen() {
264: delGen = NO;
265: clearFiles();
266: }
267:
268: public Object clone() {
269: SegmentInfo si = new SegmentInfo(name, docCount, dir);
270: si.isCompoundFile = isCompoundFile;
271: si.delGen = delGen;
272: si.preLockless = preLockless;
273: si.hasSingleNormFile = hasSingleNormFile;
274: if (normGen != null) {
275: si.normGen = (long[]) normGen.clone();
276: }
277: si.docStoreOffset = docStoreOffset;
278: si.docStoreSegment = docStoreSegment;
279: si.docStoreIsCompoundFile = docStoreIsCompoundFile;
280: return si;
281: }
282:
283: String getDelFileName() {
284: if (delGen == NO) {
285: // In this case we know there is no deletion filename
286: // against this segment
287: return null;
288: } else {
289: // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
290: return IndexFileNames.fileNameFromGeneration(name, "."
291: + IndexFileNames.DELETES_EXTENSION, delGen);
292: }
293: }
294:
295: /**
296: * Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
297: *
298: * @param fieldNumber the field index to check
299: */
300: boolean hasSeparateNorms(int fieldNumber) throws IOException {
301: if ((normGen == null && preLockless)
302: || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) {
303: // Must fallback to directory file exists check:
304: String fileName = name + ".s" + fieldNumber;
305: return dir.fileExists(fileName);
306: } else if (normGen == null || normGen[fieldNumber] == NO) {
307: return false;
308: } else {
309: return true;
310: }
311: }
312:
313: /**
314: * Returns true if any fields in this segment have separate norms.
315: */
316: boolean hasSeparateNorms() throws IOException {
317: if (normGen == null) {
318: if (!preLockless) {
319: // This means we were created w/ LOCKLESS code and no
320: // norms are written yet:
321: return false;
322: } else {
323: // This means this segment was saved with pre-LOCKLESS
324: // code. So we must fallback to the original
325: // directory list check:
326: String[] result = dir.list();
327: if (result == null)
328: throw new IOException("cannot read directory "
329: + dir + ": list() returned null");
330:
331: String pattern;
332: pattern = name + ".s";
333: int patternLength = pattern.length();
334: for (int i = 0; i < result.length; i++) {
335: if (result[i].startsWith(pattern)
336: && Character.isDigit(result[i]
337: .charAt(patternLength)))
338: return true;
339: }
340: return false;
341: }
342: } else {
343: // This means this segment was saved with LOCKLESS
344: // code so we first check whether any normGen's are >= 1
345: // (meaning they definitely have separate norms):
346: for (int i = 0; i < normGen.length; i++) {
347: if (normGen[i] >= YES) {
348: return true;
349: }
350: }
351: // Next we look for any == 0. These cases were
352: // pre-LOCKLESS and must be checked in directory:
353: for (int i = 0; i < normGen.length; i++) {
354: if (normGen[i] == CHECK_DIR) {
355: if (hasSeparateNorms(i)) {
356: return true;
357: }
358: }
359: }
360: }
361:
362: return false;
363: }
364:
365: /**
366: * Increment the generation count for the norms file for
367: * this field.
368: *
369: * @param fieldIndex field whose norm file will be rewritten
370: */
371: void advanceNormGen(int fieldIndex) {
372: if (normGen[fieldIndex] == NO) {
373: normGen[fieldIndex] = YES;
374: } else {
375: normGen[fieldIndex]++;
376: }
377: clearFiles();
378: }
379:
380: /**
381: * Get the file name for the norms file for this field.
382: *
383: * @param number field index
384: */
385: String getNormFileName(int number) throws IOException {
386: String prefix;
387:
388: long gen;
389: if (normGen == null) {
390: gen = CHECK_DIR;
391: } else {
392: gen = normGen[number];
393: }
394:
395: if (hasSeparateNorms(number)) {
396: // case 1: separate norm
397: prefix = ".s";
398: return IndexFileNames.fileNameFromGeneration(name, prefix
399: + number, gen);
400: }
401:
402: if (hasSingleNormFile) {
403: // case 2: lockless (or nrm file exists) - single file for all norms
404: prefix = "." + IndexFileNames.NORMS_EXTENSION;
405: return IndexFileNames.fileNameFromGeneration(name, prefix,
406: WITHOUT_GEN);
407: }
408:
409: // case 3: norm file for each field
410: prefix = ".f";
411: return IndexFileNames.fileNameFromGeneration(name, prefix
412: + number, WITHOUT_GEN);
413: }
414:
415: /**
416: * Mark whether this segment is stored as a compound file.
417: *
418: * @param isCompoundFile true if this is a compound file;
419: * else, false
420: */
421: void setUseCompoundFile(boolean isCompoundFile) {
422: if (isCompoundFile) {
423: this .isCompoundFile = YES;
424: } else {
425: this .isCompoundFile = NO;
426: }
427: clearFiles();
428: }
429:
430: /**
431: * Returns true if this segment is stored as a compound
432: * file; else, false.
433: */
434: boolean getUseCompoundFile() throws IOException {
435: if (isCompoundFile == NO) {
436: return false;
437: } else if (isCompoundFile == YES) {
438: return true;
439: } else {
440: return dir.fileExists(name + "."
441: + IndexFileNames.COMPOUND_FILE_EXTENSION);
442: }
443: }
444:
445: int getDocStoreOffset() {
446: return docStoreOffset;
447: }
448:
449: boolean getDocStoreIsCompoundFile() {
450: return docStoreIsCompoundFile;
451: }
452:
453: void setDocStoreIsCompoundFile(boolean v) {
454: docStoreIsCompoundFile = v;
455: clearFiles();
456: }
457:
458: String getDocStoreSegment() {
459: return docStoreSegment;
460: }
461:
462: void setDocStoreOffset(int offset) {
463: docStoreOffset = offset;
464: clearFiles();
465: }
466:
467: /**
468: * Save this segment's info.
469: */
470: void write(IndexOutput output) throws IOException {
471: output.writeString(name);
472: output.writeInt(docCount);
473: output.writeLong(delGen);
474: output.writeInt(docStoreOffset);
475: if (docStoreOffset != -1) {
476: output.writeString(docStoreSegment);
477: output.writeByte((byte) (docStoreIsCompoundFile ? 1 : 0));
478: }
479:
480: output.writeByte((byte) (hasSingleNormFile ? 1 : 0));
481: if (normGen == null) {
482: output.writeInt(NO);
483: } else {
484: output.writeInt(normGen.length);
485: for (int j = 0; j < normGen.length; j++) {
486: output.writeLong(normGen[j]);
487: }
488: }
489: output.writeByte(isCompoundFile);
490: }
491:
492: private void addIfExists(List files, String fileName)
493: throws IOException {
494: if (dir.fileExists(fileName))
495: files.add(fileName);
496: }
497:
498: /*
499: * Return all files referenced by this SegmentInfo. The
500: * returns List is a locally cached List so you should not
501: * modify it.
502: */
503:
504: public List files() throws IOException {
505:
506: if (files != null) {
507: // Already cached:
508: return files;
509: }
510:
511: files = new ArrayList();
512:
513: boolean useCompoundFile = getUseCompoundFile();
514:
515: if (useCompoundFile) {
516: files.add(name + "."
517: + IndexFileNames.COMPOUND_FILE_EXTENSION);
518: } else {
519: final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
520: for (int i = 0; i < exts.length; i++)
521: addIfExists(files, name + "." + exts[i]);
522: }
523:
524: if (docStoreOffset != -1) {
525: // We are sharing doc stores (stored fields, term
526: // vectors) with other segments
527: assert docStoreSegment != null;
528: if (docStoreIsCompoundFile) {
529: files.add(docStoreSegment + "."
530: + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
531: } else {
532: final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
533: for (int i = 0; i < exts.length; i++)
534: addIfExists(files, docStoreSegment + "." + exts[i]);
535: }
536: } else if (!useCompoundFile) {
537: // We are not sharing, and, these files were not
538: // included in the compound file
539: final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
540: for (int i = 0; i < exts.length; i++)
541: addIfExists(files, name + "." + exts[i]);
542: }
543:
544: String delFileName = IndexFileNames.fileNameFromGeneration(
545: name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
546: if (delFileName != null
547: && (delGen >= YES || dir.fileExists(delFileName))) {
548: files.add(delFileName);
549: }
550:
551: // Careful logic for norms files
552: if (normGen != null) {
553: for (int i = 0; i < normGen.length; i++) {
554: long gen = normGen[i];
555: if (gen >= YES) {
556: // Definitely a separate norm file, with generation:
557: files
558: .add(IndexFileNames
559: .fileNameFromGeneration(
560: name,
561: "."
562: + IndexFileNames.SEPARATE_NORMS_EXTENSION
563: + i, gen));
564: } else if (NO == gen) {
565: // No separate norms but maybe plain norms
566: // in the non compound file case:
567: if (!hasSingleNormFile && !useCompoundFile) {
568: String fileName = name + "."
569: + IndexFileNames.PLAIN_NORMS_EXTENSION
570: + i;
571: if (dir.fileExists(fileName)) {
572: files.add(fileName);
573: }
574: }
575: } else if (CHECK_DIR == gen) {
576: // Pre-2.1: we have to check file existence
577: String fileName = null;
578: if (useCompoundFile) {
579: fileName = name
580: + "."
581: + IndexFileNames.SEPARATE_NORMS_EXTENSION
582: + i;
583: } else if (!hasSingleNormFile) {
584: fileName = name + "."
585: + IndexFileNames.PLAIN_NORMS_EXTENSION
586: + i;
587: }
588: if (fileName != null && dir.fileExists(fileName)) {
589: files.add(fileName);
590: }
591: }
592: }
593: } else if (preLockless
594: || (!hasSingleNormFile && !useCompoundFile)) {
595: // Pre-2.1: we have to scan the dir to find all
596: // matching _X.sN/_X.fN files for our segment:
597: String prefix;
598: if (useCompoundFile)
599: prefix = name + "."
600: + IndexFileNames.SEPARATE_NORMS_EXTENSION;
601: else
602: prefix = name + "."
603: + IndexFileNames.PLAIN_NORMS_EXTENSION;
604: int prefixLength = prefix.length();
605: String[] allFiles = dir.list();
606: if (allFiles == null)
607: throw new IOException("cannot read directory " + dir
608: + ": list() returned null");
609: for (int i = 0; i < allFiles.length; i++) {
610: String fileName = allFiles[i];
611: if (fileName.length() > prefixLength
612: && Character.isDigit(fileName
613: .charAt(prefixLength))
614: && fileName.startsWith(prefix)) {
615: files.add(fileName);
616: }
617: }
618: }
619: return files;
620: }
621:
622: /* Called whenever any change is made that affects which
623: * files this segment has. */
624: private void clearFiles() {
625: files = null;
626: sizeInBytes = -1;
627: }
628:
629: /** Used for debugging */
630: public String segString(Directory dir) {
631: String cfs;
632: try {
633: if (getUseCompoundFile())
634: cfs = "c";
635: else
636: cfs = "C";
637: } catch (IOException ioe) {
638: cfs = "?";
639: }
640:
641: String docStore;
642:
643: if (docStoreOffset != -1)
644: docStore = "->" + docStoreSegment;
645: else
646: docStore = "";
647:
648: return name + ":" + cfs + (this .dir == dir ? "" : "x")
649: + docCount + docStore;
650: }
651:
652: /** We consider another SegmentInfo instance equal if it
653: * has the same dir and same name. */
654: public boolean equals(Object obj) {
655: SegmentInfo other;
656: try {
657: other = (SegmentInfo) obj;
658: } catch (ClassCastException cce) {
659: return false;
660: }
661: return other.dir == dir && other.name.equals(name);
662: }
663:
664: public int hashCode() {
665: return dir.hashCode() + name.hashCode();
666: }
667: }
|