001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.lucene.misc;
017:
018: import org.apache.lucene.index.Term;
019: import org.apache.lucene.search.Similarity;
020: import org.apache.lucene.search.DefaultSimilarity;
021: import org.apache.lucene.search.Query;
022: import org.apache.lucene.search.TermQuery;
023: import org.apache.lucene.search.PhraseQuery;
024: import org.apache.lucene.search.DisjunctionMaxQuery;
025: import org.apache.lucene.search.BooleanQuery;
026: import org.apache.lucene.search.BooleanClause;
027: import org.apache.lucene.search.BooleanClause.Occur;
028:
029: import junit.framework.Test;
030: import junit.framework.TestCase;
031: import junit.framework.TestSuite;
032:
033: import java.io.File;
034: import java.math.BigDecimal;
035: import java.util.Random;
036: import java.util.Date;
037: import java.util.List;
038: import java.util.Arrays;
039: import java.util.Map;
040: import java.util.HashMap;
041: import java.util.Iterator;
042:
043: /**
044: * Test of the SweetSpotSimilarity
045: */
046: public class SweetSpotSimilarityTest extends TestCase {
047:
048: public void testSweetSpotLengthNorm() {
049:
050: SweetSpotSimilarity ss = new SweetSpotSimilarity();
051: ss.setLengthNormFactors(1, 1, 0.5f);
052:
053: Similarity d = new DefaultSimilarity();
054: Similarity s = ss;
055:
056: // base case, should degrade
057:
058: for (int i = 1; i < 1000; i++) {
059: assertEquals("base case: i=" + i, d.lengthNorm("foo", i), s
060: .lengthNorm("foo", i), 0.0f);
061: }
062:
063: // make a sweet spot
064:
065: ss.setLengthNormFactors(3, 10, 0.5f);
066:
067: for (int i = 3; i <= 10; i++) {
068: assertEquals("3,10: spot i=" + i, 1.0f, s.lengthNorm("foo",
069: i), 0.0f);
070: }
071:
072: for (int i = 10; i < 1000; i++) {
073: assertEquals("3,10: 10<x : i=" + i, d.lengthNorm("foo",
074: i - 9), s.lengthNorm("foo", i), 0.0f);
075: }
076:
077: // seperate sweet spot for certain fields
078:
079: ss.setLengthNormFactors("bar", 8, 13, 0.5f);
080: ss.setLengthNormFactors("yak", 6, 9, 0.5f);
081:
082: for (int i = 3; i <= 10; i++) {
083: assertEquals("f: 3,10: spot i=" + i, 1.0f, s.lengthNorm(
084: "foo", i), 0.0f);
085: }
086: for (int i = 10; i < 1000; i++) {
087: assertEquals("f: 3,10: 10<x : i=" + i, d.lengthNorm("foo",
088: i - 9), s.lengthNorm("foo", i), 0.0f);
089: }
090: for (int i = 8; i <= 13; i++) {
091: assertEquals("f: 8,13: spot i=" + i, 1.0f, s.lengthNorm(
092: "bar", i), 0.0f);
093: }
094: for (int i = 6; i <= 9; i++) {
095: assertEquals("f: 6,9: spot i=" + i, 1.0f, s.lengthNorm(
096: "yak", i), 0.0f);
097: }
098: for (int i = 13; i < 1000; i++) {
099: assertEquals("f: 8,13: 13<x : i=" + i, d.lengthNorm("foo",
100: i - 12), s.lengthNorm("bar", i), 0.0f);
101: }
102: for (int i = 9; i < 1000; i++) {
103: assertEquals("f: 6,9: 9<x : i=" + i, d.lengthNorm("foo",
104: i - 8), s.lengthNorm("yak", i), 0.0f);
105: }
106:
107: // steepness
108:
109: ss.setLengthNormFactors("a", 5, 8, 0.5f);
110: ss.setLengthNormFactors("b", 5, 8, 0.1f);
111:
112: for (int i = 9; i < 1000; i++) {
113: assertTrue("s: i=" + i + " : a=" + ss.lengthNorm("a", i)
114: + " < b=" + ss.lengthNorm("b", i), ss.lengthNorm(
115: "a", i) < s.lengthNorm("b", i));
116: }
117:
118: }
119:
120: public void testSweetSpotTf() {
121:
122: SweetSpotSimilarity ss = new SweetSpotSimilarity();
123:
124: Similarity d = new DefaultSimilarity();
125: Similarity s = ss;
126:
127: // tf equal
128:
129: ss.setBaselineTfFactors(0.0f, 0.0f);
130:
131: for (int i = 1; i < 1000; i++) {
132: assertEquals("tf: i=" + i, d.tf(i), s.tf(i), 0.0f);
133: }
134:
135: // tf higher
136:
137: ss.setBaselineTfFactors(1.0f, 0.0f);
138:
139: for (int i = 1; i < 1000; i++) {
140: assertTrue("tf: i=" + i + " : d=" + d.tf(i) + " < s="
141: + s.tf(i), d.tf(i) < s.tf(i));
142: }
143:
144: // tf flat
145:
146: ss.setBaselineTfFactors(1.0f, 6.0f);
147: for (int i = 1; i <= 6; i++) {
148: assertEquals("tf flat1: i=" + i, 1.0f, s.tf(i), 0.0f);
149: }
150: ss.setBaselineTfFactors(2.0f, 6.0f);
151: for (int i = 1; i <= 6; i++) {
152: assertEquals("tf flat2: i=" + i, 2.0f, s.tf(i), 0.0f);
153: }
154: for (int i = 6; i <= 1000; i++) {
155: assertTrue("tf: i=" + i + " : s=" + s.tf(i) + " < d="
156: + d.tf(i), s.tf(i) < d.tf(i));
157: }
158:
159: // stupidity
160: assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
161: }
162:
163: public void testHyperbolicSweetSpot() {
164:
165: SweetSpotSimilarity ss = new SweetSpotSimilarity() {
166: public float tf(int freq) {
167: return hyperbolicTf(freq);
168: }
169: };
170: ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
171:
172: Similarity s = ss;
173:
174: for (int i = 1; i <= 1000; i++) {
175: assertTrue("MIN tf: i=" + i + " : s=" + s.tf(i), 3.3f <= s
176: .tf(i));
177: assertTrue("MAX tf: i=" + i + " : s=" + s.tf(i),
178: s.tf(i) <= 7.7f);
179: }
180: assertEquals("MID tf", 3.3f + (7.7f - 3.3f) / 2.0f, s.tf(5),
181: 0.00001f);
182:
183: // stupidity
184: assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
185:
186: }
187:
188: }
|