001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021: import java.util.Vector;
022: import java.util.Arrays;
023: import java.io.ByteArrayOutputStream;
024: import java.io.ObjectOutputStream;
025: import java.io.IOException;
026: import java.io.File;
027:
028: import org.apache.lucene.analysis.WhitespaceAnalyzer;
029: import org.apache.lucene.search.IndexSearcher;
030: import org.apache.lucene.search.TermQuery;
031: import org.apache.lucene.search.Hits;
032: import org.apache.lucene.store.Directory;
033: import org.apache.lucene.store.IndexInput;
034: import org.apache.lucene.store.IndexOutput;
035: import org.apache.lucene.store.RAMDirectory;
036: import org.apache.lucene.document.Document;
037: import org.apache.lucene.document.Field;
038: import java.io.*;
039: import java.util.*;
040: import java.util.zip.*;
041:
042: /*
043: Verify we can read the pre-2.1 file format, do searches
044: against it, and add documents to it.
045: */
046:
047: public class TestIndexFileDeleter extends LuceneTestCase {
048: public void testDeleteLeftoverFiles() throws IOException {
049:
050: Directory dir = new RAMDirectory();
051:
052: IndexWriter writer = new IndexWriter(dir,
053: new WhitespaceAnalyzer(), true);
054: writer.setMaxBufferedDocs(10);
055: int i;
056: for (i = 0; i < 35; i++) {
057: addDoc(writer, i);
058: }
059: writer.setUseCompoundFile(false);
060: for (; i < 45; i++) {
061: addDoc(writer, i);
062: }
063: writer.close();
064:
065: // Delete one doc so we get a .del file:
066: IndexReader reader = IndexReader.open(dir);
067: Term searchTerm = new Term("id", "7");
068: int delCount = reader.deleteDocuments(searchTerm);
069: assertEquals("didn't delete the right number of documents", 1,
070: delCount);
071:
072: // Set one norm so we get a .s0 file:
073: reader.setNorm(21, "content", (float) 1.5);
074: reader.close();
075:
076: // Now, artificially create an extra .del file & extra
077: // .s0 file:
078: String[] files = dir.list();
079:
080: /*
081: for(int i=0;i<files.length;i++) {
082: System.out.println(i + ": " + files[i]);
083: }
084: */
085:
086: // The numbering of fields can vary depending on which
087: // JRE is in use. On some JREs we see content bound to
088: // field 0; on others, field 1. So, here we have to
089: // figure out which field number corresponds to
090: // "content", and then set our expected file names below
091: // accordingly:
092: CompoundFileReader cfsReader = new CompoundFileReader(dir,
093: "_2.cfs");
094: FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
095: int contentFieldIndex = -1;
096: for (i = 0; i < fieldInfos.size(); i++) {
097: FieldInfo fi = fieldInfos.fieldInfo(i);
098: if (fi.name.equals("content")) {
099: contentFieldIndex = i;
100: break;
101: }
102: }
103: cfsReader.close();
104: assertTrue(
105: "could not locate the 'content' field number in the _2.cfs segment",
106: contentFieldIndex != -1);
107:
108: String normSuffix = "s" + contentFieldIndex;
109:
110: // Create a bogus separate norms file for a
111: // segment/field that actually has a separate norms file
112: // already:
113: copyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);
114:
115: // Create a bogus separate norms file for a
116: // segment/field that actually has a separate norms file
117: // already, using the "not compound file" extension:
118: copyFile(dir, "_2_1." + normSuffix, "_2_2.f"
119: + contentFieldIndex);
120:
121: // Create a bogus separate norms file for a
122: // segment/field that does not have a separate norms
123: // file already:
124: copyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);
125:
126: // Create a bogus separate norms file for a
127: // segment/field that does not have a separate norms
128: // file already using the "not compound file" extension:
129: copyFile(dir, "_2_1." + normSuffix, "_1_1.f"
130: + contentFieldIndex);
131:
132: // Create a bogus separate del file for a
133: // segment that already has a separate del file:
134: copyFile(dir, "_0_1.del", "_0_2.del");
135:
136: // Create a bogus separate del file for a
137: // segment that does not yet have a separate del file:
138: copyFile(dir, "_0_1.del", "_1_1.del");
139:
140: // Create a bogus separate del file for a
141: // non-existent segment:
142: copyFile(dir, "_0_1.del", "_188_1.del");
143:
144: // Create a bogus segment file:
145: copyFile(dir, "_0.cfs", "_188.cfs");
146:
147: // Create a bogus fnm file when the CFS already exists:
148: copyFile(dir, "_0.cfs", "_0.fnm");
149:
150: // Create a deletable file:
151: copyFile(dir, "_0.cfs", "deletable");
152:
153: // Create some old segments file:
154: copyFile(dir, "segments_a", "segments");
155: copyFile(dir, "segments_a", "segments_2");
156:
157: // Create a bogus cfs file shadowing a non-cfs segment:
158: copyFile(dir, "_2.cfs", "_3.cfs");
159:
160: String[] filesPre = dir.list();
161:
162: // Open & close a writer: it should delete the above 4
163: // files and nothing more:
164: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
165: writer.close();
166:
167: String[] files2 = dir.list();
168: dir.close();
169:
170: Arrays.sort(files);
171: Arrays.sort(files2);
172:
173: if (!Arrays.equals(files, files2)) {
174: fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted "
175: + (filesPre.length - files.length)
176: + " files but only deleted "
177: + (filesPre.length - files2.length)
178: + "; expected files:\n "
179: + asString(files)
180: + "\n actual files:\n " + asString(files2));
181: }
182: }
183:
184: private String asString(String[] l) {
185: String s = "";
186: for (int i = 0; i < l.length; i++) {
187: if (i > 0) {
188: s += "\n ";
189: }
190: s += l[i];
191: }
192: return s;
193: }
194:
195: public void copyFile(Directory dir, String src, String dest)
196: throws IOException {
197: IndexInput in = dir.openInput(src);
198: IndexOutput out = dir.createOutput(dest);
199: byte[] b = new byte[1024];
200: long remainder = in.length();
201: while (remainder > 0) {
202: int len = (int) Math.min(b.length, remainder);
203: in.readBytes(b, 0, len);
204: out.writeBytes(b, len);
205: remainder -= len;
206: }
207: in.close();
208: out.close();
209: }
210:
211: private void addDoc(IndexWriter writer, int id) throws IOException {
212: Document doc = new Document();
213: doc.add(new Field("content", "aaa", Field.Store.NO,
214: Field.Index.TOKENIZED));
215: doc.add(new Field("id", Integer.toString(id), Field.Store.YES,
216: Field.Index.UN_TOKENIZED));
217: writer.addDocument(doc);
218: }
219: }
|