001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021:
022: import org.apache.lucene.analysis.Analyzer;
023: import org.apache.lucene.analysis.standard.StandardAnalyzer;
024: import org.apache.lucene.document.Document;
025: import org.apache.lucene.document.Field;
026: import org.apache.lucene.document.Field.Index;
027: import org.apache.lucene.document.Field.Store;
028: import org.apache.lucene.search.DefaultSimilarity;
029: import org.apache.lucene.search.Similarity;
030: import org.apache.lucene.store.Directory;
031: import org.apache.lucene.store.FSDirectory;
032:
033: import java.io.File;
034: import java.io.IOException;
035: import java.util.ArrayList;
036:
037: /**
038: * Test that norms info is preserved during index life - including
039: * separate norms, addDocument, addIndexes, optimize.
040: */
041: public class TestNorms extends LuceneTestCase {
042:
043: private class SimilarityOne extends DefaultSimilarity {
044: public float lengthNorm(String fieldName, int numTerms) {
045: return 1;
046: }
047: }
048:
049: private static final int NUM_FIELDS = 10;
050:
051: private Similarity similarityOne;
052: private Analyzer anlzr;
053: private int numDocNorms;
054: private ArrayList norms;
055: private ArrayList modifiedNorms;
056: private float lastNorm = 0;
057: private float normDelta = (float) 0.001;
058:
059: public TestNorms(String s) {
060: super (s);
061: }
062:
063: protected void setUp() throws Exception {
064: super .setUp();
065: similarityOne = new SimilarityOne();
066: anlzr = new StandardAnalyzer();
067: }
068:
069: /**
070: * Test that norms values are preserved as the index is maintained.
071: * Including separate norms.
072: * Including merging indexes with seprate norms.
073: * Including optimize.
074: */
075: public void testNorms() throws IOException {
076: // tmp dir
077: String tempDir = System.getProperty("java.io.tmpdir");
078: if (tempDir == null) {
079: throw new IOException(
080: "java.io.tmpdir undefined, cannot run test");
081: }
082:
083: // test with a single index: index1
084: File indexDir1 = new File(tempDir, "lucenetestindex1");
085: Directory dir1 = FSDirectory.getDirectory(indexDir1);
086:
087: norms = new ArrayList();
088: modifiedNorms = new ArrayList();
089:
090: createIndex(dir1);
091: doTestNorms(dir1);
092:
093: // test with a single index: index2
094: ArrayList norms1 = norms;
095: ArrayList modifiedNorms1 = modifiedNorms;
096: int numDocNorms1 = numDocNorms;
097:
098: norms = new ArrayList();
099: modifiedNorms = new ArrayList();
100: numDocNorms = 0;
101:
102: File indexDir2 = new File(tempDir, "lucenetestindex2");
103: Directory dir2 = FSDirectory.getDirectory(indexDir2);
104:
105: createIndex(dir2);
106: doTestNorms(dir2);
107:
108: // add index1 and index2 to a third index: index3
109: File indexDir3 = new File(tempDir, "lucenetestindex3");
110: Directory dir3 = FSDirectory.getDirectory(indexDir3);
111:
112: createIndex(dir3);
113: IndexWriter iw = new IndexWriter(dir3, anlzr, false);
114: iw.setMaxBufferedDocs(5);
115: iw.setMergeFactor(3);
116: iw.addIndexes(new Directory[] { dir1, dir2 });
117: iw.close();
118:
119: norms1.addAll(norms);
120: norms = norms1;
121: modifiedNorms1.addAll(modifiedNorms);
122: modifiedNorms = modifiedNorms1;
123: numDocNorms += numDocNorms1;
124:
125: // test with index3
126: verifyIndex(dir3);
127: doTestNorms(dir3);
128:
129: // now with optimize
130: iw = new IndexWriter(dir3, anlzr, false);
131: iw.setMaxBufferedDocs(5);
132: iw.setMergeFactor(3);
133: iw.optimize();
134: iw.close();
135: verifyIndex(dir3);
136:
137: dir1.close();
138: dir2.close();
139: dir3.close();
140: }
141:
142: private void doTestNorms(Directory dir) throws IOException {
143: for (int i = 0; i < 5; i++) {
144: addDocs(dir, 12, true);
145: verifyIndex(dir);
146: modifyNormsForF1(dir);
147: verifyIndex(dir);
148: addDocs(dir, 12, false);
149: verifyIndex(dir);
150: modifyNormsForF1(dir);
151: verifyIndex(dir);
152: }
153: }
154:
155: private void createIndex(Directory dir) throws IOException {
156: IndexWriter iw = new IndexWriter(dir, anlzr, true);
157: iw.setMaxBufferedDocs(5);
158: iw.setMergeFactor(3);
159: iw.setSimilarity(similarityOne);
160: iw.setUseCompoundFile(true);
161: iw.close();
162: }
163:
164: private void modifyNormsForF1(Directory dir) throws IOException {
165: IndexReader ir = IndexReader.open(dir);
166: int n = ir.maxDoc();
167: for (int i = 0; i < n; i += 3) { // modify for every third doc
168: int k = (i * 3) % modifiedNorms.size();
169: float origNorm = ((Float) modifiedNorms.get(i))
170: .floatValue();
171: float newNorm = ((Float) modifiedNorms.get(k)).floatValue();
172: //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
173: //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
174: modifiedNorms.set(i, new Float(newNorm));
175: modifiedNorms.set(k, new Float(origNorm));
176: ir.setNorm(i, "f" + 1, newNorm);
177: ir.setNorm(k, "f" + 1, origNorm);
178: }
179: ir.close();
180: }
181:
182: private void verifyIndex(Directory dir) throws IOException {
183: IndexReader ir = IndexReader.open(dir);
184: for (int i = 0; i < NUM_FIELDS; i++) {
185: String field = "f" + i;
186: byte b[] = ir.norms(field);
187: assertEquals("number of norms mismatches", numDocNorms,
188: b.length);
189: ArrayList storedNorms = (i == 1 ? modifiedNorms : norms);
190: for (int j = 0; j < b.length; j++) {
191: float norm = Similarity.decodeNorm(b[j]);
192: float norm1 = ((Float) storedNorms.get(j)).floatValue();
193: assertEquals("stored norm value of " + field
194: + " for doc " + j + " is " + norm
195: + " - a mismatch!", norm, norm1, 0.000001);
196: }
197: }
198: }
199:
200: private void addDocs(Directory dir, int ndocs, boolean compound)
201: throws IOException {
202: IndexWriter iw = new IndexWriter(dir, anlzr, false);
203: iw.setMaxBufferedDocs(5);
204: iw.setMergeFactor(3);
205: iw.setSimilarity(similarityOne);
206: iw.setUseCompoundFile(compound);
207: for (int i = 0; i < ndocs; i++) {
208: iw.addDocument(newDoc());
209: }
210: iw.close();
211: }
212:
213: // create the next document
214: private Document newDoc() {
215: Document d = new Document();
216: float boost = nextNorm();
217: for (int i = 0; i < 10; i++) {
218: Field f = new Field("f" + i, "v" + i, Store.NO,
219: Index.UN_TOKENIZED);
220: f.setBoost(boost);
221: d.add(f);
222: }
223: return d;
224: }
225:
226: // return unique norm values that are unchanged by encoding/decoding
227: private float nextNorm() {
228: float norm = lastNorm + normDelta;
229: do {
230: float norm1 = Similarity.decodeNorm(Similarity
231: .encodeNorm(norm));
232: if (norm1 > lastNorm) {
233: //System.out.println(norm1+" > "+lastNorm);
234: norm = norm1;
235: break;
236: }
237: norm += normDelta;
238: } while (true);
239: norms.add(numDocNorms, new Float(norm));
240: modifiedNorms.add(numDocNorms, new Float(norm));
241: //System.out.println("creating norm("+numDocNorms+"): "+norm);
242: numDocNorms++;
243: lastNorm = (norm > 10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
244: return norm;
245: }
246:
247: }
|