001: package org.apache.lucene.misc;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import junit.framework.TestCase;
021:
022: import org.apache.lucene.index.Term;
023: import org.apache.lucene.index.IndexWriter;
024: import org.apache.lucene.index.IndexReader;
025: import org.apache.lucene.search.IndexSearcher;
026: import org.apache.lucene.search.Similarity;
027: import org.apache.lucene.search.DefaultSimilarity;
028: import org.apache.lucene.search.TermQuery;
029: import org.apache.lucene.search.HitCollector;
030: import org.apache.lucene.store.RAMDirectory;
031: import org.apache.lucene.store.Directory;
032: import org.apache.lucene.analysis.SimpleAnalyzer;
033: import org.apache.lucene.document.Document;
034: import org.apache.lucene.document.Field;
035:
036: /**
037: * Tests changing the norms after changing the simularity
038: *
039: * @version $Id:$
040: */
041: public class TestLengthNormModifier extends TestCase {
042: public TestLengthNormModifier(String name) {
043: super (name);
044: }
045:
046: public static byte DEFAULT_NORM = Similarity.encodeNorm(1.0f);
047:
048: public static int NUM_DOCS = 5;
049:
050: public Directory store = new RAMDirectory();
051:
052: /** inverts the normal notion of lengthNorm */
053: public static Similarity s = new DefaultSimilarity() {
054: public float lengthNorm(String fieldName, int numTokens) {
055: return (float) numTokens;
056: }
057: };
058:
059: public void setUp() throws Exception {
060: IndexWriter writer = new IndexWriter(store,
061: new SimpleAnalyzer(), true);
062:
063: IndexSearcher searcher;
064:
065: for (int i = 0; i < NUM_DOCS; i++) {
066: Document d = new Document();
067: d.add(new Field("field", "word", Field.Store.YES,
068: Field.Index.TOKENIZED));
069: d.add(new Field("nonorm", "word", Field.Store.YES,
070: Field.Index.NO_NORMS));
071:
072: for (int j = 1; j <= i; j++) {
073: d.add(new Field("field", "crap", Field.Store.YES,
074: Field.Index.TOKENIZED));
075: d.add(new Field("nonorm", "more words",
076: Field.Store.YES, Field.Index.NO_NORMS));
077: }
078: writer.addDocument(d);
079: }
080: writer.close();
081: }
082:
083: public void testMissingField() {
084: LengthNormModifier lnm = new LengthNormModifier(store, s);
085: try {
086: lnm.reSetNorms("nobodyherebutuschickens");
087: } catch (Exception e) {
088: assertNull("caught something", e);
089: }
090: }
091:
092: public void testFieldWithNoNorm() throws Exception {
093:
094: IndexReader r = IndexReader.open(store);
095: byte[] norms = r.norms("nonorm");
096:
097: // sanity check, norms should all be 1
098: assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
099: for (int i = 0; i < norms.length; i++) {
100: assertEquals("" + i, DEFAULT_NORM, norms[i]);
101: }
102:
103: r.close();
104:
105: LengthNormModifier lnm = new LengthNormModifier(store, s);
106: try {
107: lnm.reSetNorms("nonorm");
108: } catch (Exception e) {
109: assertNull("caught something", e);
110: }
111:
112: // nothing should have changed
113: r = IndexReader.open(store);
114:
115: norms = r.norms("nonorm");
116: assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
117: for (int i = 0; i < norms.length; i++) {
118: assertEquals("" + i, DEFAULT_NORM, norms[i]);
119: }
120:
121: r.close();
122:
123: }
124:
125: public void testGoodCases() throws Exception {
126:
127: IndexSearcher searcher;
128: final float[] scores = new float[NUM_DOCS];
129: float lastScore = 0.0f;
130:
131: // default similarity should put docs with shorter length first
132: searcher = new IndexSearcher(store);
133: searcher.search(new TermQuery(new Term("field", "word")),
134: new HitCollector() {
135: public final void collect(int doc, float score) {
136: scores[doc] = score;
137: }
138: });
139: searcher.close();
140:
141: lastScore = Float.MAX_VALUE;
142: for (int i = 0; i < NUM_DOCS; i++) {
143: String msg = "i=" + i + ", " + scores[i] + " <= "
144: + lastScore;
145: assertTrue(msg, scores[i] <= lastScore);
146: //System.out.println(msg);
147: lastScore = scores[i];
148: }
149:
150: // override the norms to be inverted
151: Similarity s = new DefaultSimilarity() {
152: public float lengthNorm(String fieldName, int numTokens) {
153: return (float) numTokens;
154: }
155: };
156: LengthNormModifier lnm = new LengthNormModifier(store, s);
157: lnm.reSetNorms("field");
158:
159: // new norm (with default similarity) should put longer docs first
160: searcher = new IndexSearcher(store);
161: searcher.search(new TermQuery(new Term("field", "word")),
162: new HitCollector() {
163: public final void collect(int doc, float score) {
164: scores[doc] = score;
165: }
166: });
167: searcher.close();
168:
169: lastScore = 0.0f;
170: for (int i = 0; i < NUM_DOCS; i++) {
171: String msg = "i=" + i + ", " + scores[i] + " >= "
172: + lastScore;
173: assertTrue(msg, scores[i] >= lastScore);
174: //System.out.println(msg);
175: lastScore = scores[i];
176: }
177:
178: }
179: }
|