001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.search;
017:
018: import junit.framework.TestCase;
019:
020: import java.util.Random;
021:
022: import org.apache.solr.util.OpenBitSet;
023: import org.apache.solr.util.BitSetIterator;
024: import org.apache.solr.util.BitUtil;
025:
026: /**
027: * @author yonik
028: * @version $Id$
029: */
030: public class TestDocSet extends TestCase {
031: Random rand = new Random();
032: float loadfactor;
033:
034: public OpenBitSet getRandomSet(int sz, int bitsToSet) {
035: OpenBitSet bs = new OpenBitSet(sz);
036: if (sz == 0)
037: return bs;
038: for (int i = 0; i < bitsToSet; i++) {
039: bs.fastSet(rand.nextInt(sz));
040: }
041: return bs;
042: }
043:
044: public DocSet getHashDocSet(OpenBitSet bs) {
045: int[] docs = new int[(int) bs.cardinality()];
046: BitSetIterator iter = new BitSetIterator(bs);
047: for (int i = 0; i < docs.length; i++) {
048: docs[i] = iter.next();
049: }
050: return new HashDocSet(docs, 0, docs.length);
051: }
052:
053: public DocSet getBitDocSet(OpenBitSet bs) {
054: return new BitDocSet(bs);
055: }
056:
057: public DocSet getDocSet(OpenBitSet bs) {
058: return rand.nextInt(2) == 0 ? getHashDocSet(bs)
059: : getBitDocSet(bs);
060: }
061:
062: public void checkEqual(OpenBitSet bs, DocSet set) {
063: for (int i = 0; i < bs.capacity(); i++) {
064: assertEquals(bs.get(i), set.exists(i));
065: }
066: }
067:
068: protected void doSingle(int maxSize) {
069: int sz = rand.nextInt(maxSize + 1);
070: int sz2 = rand.nextInt(maxSize);
071: OpenBitSet a1 = getRandomSet(sz, rand.nextInt(sz + 1));
072: OpenBitSet a2 = getRandomSet(sz, rand.nextInt(sz2 + 1));
073:
074: DocSet b1 = getDocSet(a1);
075: DocSet b2 = getDocSet(a2);
076:
077: // System.out.println("b1="+b1+", b2="+b2);
078:
079: assertEquals((int) a1.cardinality(), b1.size());
080: assertEquals((int) a2.cardinality(), b2.size());
081:
082: checkEqual(a1, b1);
083: checkEqual(a2, b2);
084:
085: OpenBitSet a_and = (OpenBitSet) a1.clone();
086: a_and.and(a2);
087: OpenBitSet a_or = (OpenBitSet) a1.clone();
088: a_or.or(a2);
089: // OpenBitSet a_xor = (OpenBitSet)a1.clone(); a_xor.xor(a2);
090: OpenBitSet a_andn = (OpenBitSet) a1.clone();
091: a_andn.andNot(a2);
092:
093: checkEqual(a_and, b1.intersection(b2));
094: checkEqual(a_or, b1.union(b2));
095: checkEqual(a_andn, b1.andNot(b2));
096:
097: assertEquals(a_and.cardinality(), b1.intersectionSize(b2));
098: assertEquals(a_or.cardinality(), b1.unionSize(b2));
099: assertEquals(a_andn.cardinality(), b1.andNotSize(b2));
100: }
101:
102: public void doMany(int maxSz, int iter) {
103: for (int i = 0; i < iter; i++) {
104: doSingle(maxSz);
105: }
106: }
107:
108: public void testRandomDocSets() {
109: doMany(300, 5000);
110: }
111:
112: public HashDocSet getRandomHashDocset(int maxSetSize, int maxDoc) {
113: int n = rand.nextInt(maxSetSize);
114: OpenBitSet obs = new OpenBitSet(maxDoc);
115: int[] a = new int[n];
116: for (int i = 0; i < n; i++) {
117: for (;;) {
118: int idx = rand.nextInt(maxDoc);
119: if (obs.getAndSet(idx))
120: continue;
121: a[i] = idx;
122: break;
123: }
124: }
125: return loadfactor != 0 ? new HashDocSet(a, 0, n, 1 / loadfactor)
126: : new HashDocSet(a, 0, n);
127: }
128:
129: public DocSet[] getRandomHashSets(int nSets, int maxSetSize,
130: int maxDoc) {
131: DocSet[] sets = new DocSet[nSets];
132:
133: for (int i = 0; i < nSets; i++) {
134: sets[i] = getRandomHashDocset(maxSetSize, maxDoc);
135: }
136:
137: return sets;
138: }
139:
140: /**** needs code insertion into HashDocSet
141: public void testCollisions() {
142: loadfactor=.75f;
143: rand=new Random(12345); // make deterministic
144: int maxSetsize=4000;
145: int nSets=256;
146: int iter=1;
147: int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
148: int ret=0;
149: long start=System.currentTimeMillis();
150: for (int maxDoc : maxDocs) {
151: int cstart = HashDocSet.collisions;
152: DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
153: for (DocSet s1 : sets) {
154: for (DocSet s2 : sets) {
155: if (s1!=s2) ret += s1.intersectionSize(s2);
156: }
157: }
158: int cend = HashDocSet.collisions;
159: System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
160: }
161: long end=System.currentTimeMillis();
162: System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
163: if (ret==-1)System.out.println("wow!");
164: System.out.println("collisions="+HashDocSet.collisions);
165:
166: }
167: ***/
168:
169: /***
170: public void testIntersectionSizePerformance() {
171: loadfactor=.75f;
172: rand=new Random(12345); // make deterministic
173: int maxSetsize=4000;
174: int nSets=128;
175: int iter=10;
176: int maxDoc=1000000;
177: DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
178: int ret=0;
179: long start=System.currentTimeMillis();
180: for (int i=0; i<iter; i++) {
181: for (DocSet s1 : sets) {
182: for (DocSet s2 : sets) {
183: ret += s1.intersectionSize(s2);
184: }
185: }
186: }
187: long end=System.currentTimeMillis();
188: System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
189: if (ret==-1)System.out.println("wow!");
190: }
191:
192:
193: public void testExistsPerformance() {
194: loadfactor=.75f;
195: rand=new Random(12345); // make deterministic
196: int maxSetsize=4000;
197: int nSets=512;
198: int iter=1;
199: int maxDoc=1000000;
200: DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
201: int ret=0;
202: long start=System.currentTimeMillis();
203: for (int i=0; i<iter; i++) {
204: for (DocSet s1 : sets) {
205: for (int j=0; j<maxDoc; j++) {
206: ret += s1.exists(j) ? 1 :0;
207: }
208: }
209: }
210: long end=System.currentTimeMillis();
211: System.out.println("testExistsSizePerformance="+(end-start)+" ms");
212: if (ret==-1)System.out.println("wow!");
213: }
214: ***/
215:
216: /**** needs code insertion into HashDocSet
217: public void testExistsCollisions() {
218: loadfactor=.75f;
219: rand=new Random(12345); // make deterministic
220: int maxSetsize=4000;
221: int nSets=512;
222: int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
223: int ret=0;
224:
225: for (int maxDoc : maxDocs) {
226: int mask = (BitUtil.nextHighestPowerOfTwo(maxDoc)>>1)-1;
227: DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
228: int cstart = HashDocSet.collisions;
229: for (DocSet s1 : sets) {
230: for (int j=0; j<maxDocs[0]; j++) {
231: int idx = rand.nextInt()&mask;
232: ret += s1.exists(idx) ? 1 :0;
233: }
234: }
235: int cend = HashDocSet.collisions;
236: System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
237: }
238: if (ret==-1)System.out.println("wow!");
239: System.out.println("collisions="+HashDocSet.collisions);
240: }
241: ***/
242:
243: }
|