001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.util.Arrays;
022:
023: import junit.framework.TestCase;
024:
025: import org.apache.lucene.index.Term;
026: import org.apache.lucene.index.IndexWriter;
027: import org.apache.lucene.index.IndexReader;
028: import org.apache.lucene.search.IndexSearcher;
029: import org.apache.lucene.search.Similarity;
030: import org.apache.lucene.search.DefaultSimilarity;
031: import org.apache.lucene.search.TermQuery;
032: import org.apache.lucene.search.HitCollector;
033: import org.apache.lucene.store.RAMDirectory;
034: import org.apache.lucene.store.Directory;
035: import org.apache.lucene.analysis.SimpleAnalyzer;
036: import org.apache.lucene.document.Document;
037: import org.apache.lucene.document.Field;
038:
039: /**
040: * Tests changing of field norms with a custom similarity and with fake norms.
041: *
042: * @version $Id$
043: */
044: public class TestFieldNormModifier extends TestCase {
045: public TestFieldNormModifier(String name) {
046: super (name);
047: }
048:
049: public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
050:
051: public static int NUM_DOCS = 5;
052:
053: public Directory store = new RAMDirectory();
054:
055: /** inverts the normal notion of lengthNorm */
056: public static Similarity s = new DefaultSimilarity() {
057: public float lengthNorm(String fieldName, int numTokens) {
058: return (float) numTokens;
059: }
060: };
061:
062: public void setUp() throws Exception {
063: IndexWriter writer = new IndexWriter(store,
064: new SimpleAnalyzer(), true);
065:
066: for (int i = 0; i < NUM_DOCS; i++) {
067: Document d = new Document();
068: d.add(new Field("field", "word", Field.Store.YES,
069: Field.Index.TOKENIZED));
070: d.add(new Field("nonorm", "word", Field.Store.YES,
071: Field.Index.NO_NORMS));
072: d.add(new Field("untokfield", "20061212 20071212",
073: Field.Store.YES, Field.Index.TOKENIZED));
074:
075: for (int j = 1; j <= i; j++) {
076: d.add(new Field("field", "crap", Field.Store.YES,
077: Field.Index.TOKENIZED));
078: d.add(new Field("nonorm", "more words",
079: Field.Store.YES, Field.Index.NO_NORMS));
080: }
081: writer.addDocument(d);
082: }
083: writer.close();
084: }
085:
086: public void testMissingField() {
087: FieldNormModifier fnm = new FieldNormModifier(store, s);
088: try {
089: fnm.reSetNorms("nobodyherebutuschickens");
090: } catch (Exception e) {
091: assertNull("caught something", e);
092: }
093: }
094:
095: public void testFieldWithNoNorm() throws Exception {
096:
097: IndexReader r = IndexReader.open(store);
098: byte[] norms = r.norms("nonorm");
099:
100: // sanity check, norms should all be 1
101: assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
102: for (int i = 0; i < norms.length; i++) {
103: assertEquals("" + i, DEFAULT_NORM, norms[i]);
104: }
105:
106: r.close();
107:
108: FieldNormModifier fnm = new FieldNormModifier(store, s);
109: try {
110: fnm.reSetNorms("nonorm");
111: } catch (Exception e) {
112: assertNull("caught something", e);
113: }
114:
115: // nothing should have changed
116: r = IndexReader.open(store);
117:
118: norms = r.norms("nonorm");
119: assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
120: for (int i = 0; i < norms.length; i++) {
121: assertEquals("" + i, DEFAULT_NORM, norms[i]);
122: }
123:
124: r.close();
125: }
126:
127: public void testGoodCases() throws Exception {
128:
129: IndexSearcher searcher = new IndexSearcher(store);
130: final float[] scores = new float[NUM_DOCS];
131: float lastScore = 0.0f;
132:
133: // default similarity should put docs with shorter length first
134: searcher.search(new TermQuery(new Term("field", "word")),
135: new HitCollector() {
136: public final void collect(int doc, float score) {
137: scores[doc] = score;
138: }
139: });
140: searcher.close();
141:
142: lastScore = Float.MAX_VALUE;
143: for (int i = 0; i < NUM_DOCS; i++) {
144: String msg = "i=" + i + ", " + scores[i] + " <= "
145: + lastScore;
146: assertTrue(msg, scores[i] <= lastScore);
147: //System.out.println(msg);
148: lastScore = scores[i];
149: }
150:
151: FieldNormModifier fnm = new FieldNormModifier(store, s);
152: fnm.reSetNorms("field");
153:
154: // new norm (with default similarity) should put longer docs first
155: searcher = new IndexSearcher(store);
156: searcher.search(new TermQuery(new Term("field", "word")),
157: new HitCollector() {
158: public final void collect(int doc, float score) {
159: scores[doc] = score;
160: }
161: });
162: searcher.close();
163:
164: lastScore = 0.0f;
165: for (int i = 0; i < NUM_DOCS; i++) {
166: String msg = "i=" + i + ", " + scores[i] + " >= "
167: + lastScore;
168: assertTrue(msg, scores[i] >= lastScore);
169: //System.out.println(msg);
170: lastScore = scores[i];
171: }
172: }
173:
174: public void testNormKiller() throws IOException {
175:
176: IndexReader r = IndexReader.open(store);
177: byte[] oldNorms = r.norms("untokfield");
178: r.close();
179:
180: FieldNormModifier fnm = new FieldNormModifier(store, s);
181: fnm.reSetNorms("untokfield");
182:
183: r = IndexReader.open(store);
184: byte[] newNorms = r.norms("untokfield");
185: r.close();
186: assertFalse(Arrays.equals(oldNorms, newNorms));
187:
188: // verify that we still get documents in the same order as originally
189: IndexSearcher searcher = new IndexSearcher(store);
190: final float[] scores = new float[NUM_DOCS];
191: float lastScore = 0.0f;
192:
193: // default similarity should return the same score for all documents for this query
194: searcher.search(new TermQuery(
195: new Term("untokfield", "20061212")),
196: new HitCollector() {
197: public final void collect(int doc, float score) {
198: scores[doc] = score;
199: }
200: });
201: searcher.close();
202:
203: lastScore = scores[0];
204: for (int i = 0; i < NUM_DOCS; i++) {
205: String msg = "i=" + i + ", " + scores[i] + " == "
206: + lastScore;
207: assertTrue(msg, scores[i] == lastScore);
208: //System.out.println(msg);
209: lastScore = scores[i];
210: }
211: }
212: }
|