01: /**
02: * Licensed to the Apache Software Foundation (ASF) under one or more
03: * contributor license agreements. See the NOTICE file distributed with
04: * this work for additional information regarding copyright ownership.
05: * The ASF licenses this file to You under the Apache License, Version 2.0
06: * (the "License"); you may not use this file except in compliance with
07: * the License. You may obtain a copy of the License at
08: *
09: * http://www.apache.org/licenses/LICENSE-2.0
10: *
11: * Unless required by applicable law or agreed to in writing, software
12: * distributed under the License is distributed on an "AS IS" BASIS,
13: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: * See the License for the specific language governing permissions and
15: * limitations under the License.
16: */package org.apache.solr.search;
17:
18: import org.apache.lucene.search.HitCollector;
19: import org.apache.solr.util.OpenBitSet;
20: import org.apache.solr.core.SolrConfig;
21:
22: /**
23: * @author yonik
24: * @version $Id$
25: */
26:
27: final class DocSetHitCollector extends HitCollector {
28:
29: static float HASHSET_INVERSE_LOAD_FACTOR = 1.0f / SolrConfig.config
30: .getFloat("//HashDocSet/@loadFactor", 0.75f);
31: static int HASHDOCSET_MAXSIZE = SolrConfig.config.getInt(
32: "//HashDocSet/@maxSize", -1);
33:
34: int pos = 0;
35: OpenBitSet bits;
36: final int maxDoc;
37:
38: // in case there aren't that many hits, we may not want a very sparse
39: // bit array. Optimistically collect the first few docs in an array
40: // in case there are only a few.
41: final int[] scratch = new int[HASHDOCSET_MAXSIZE];
42:
43: // todo - could pass in bitset and an operation also...
44: DocSetHitCollector(int maxDoc) {
45: this .maxDoc = maxDoc;
46: }
47:
48: public void collect(int doc, float score) {
49: // optimistically collect the first docs in an array
50: // in case the total number will be small enough to represent
51: // as a HashDocSet() instead...
52: // Storing in this array will be quicker to convert
53: // than scanning through a potentially huge bit vector.
54: // FUTURE: when search methods all start returning docs in order, maybe
55: // we could have a ListDocSet() and use the collected array directly.
56: if (pos < scratch.length) {
57: scratch[pos] = doc;
58: } else {
59: // this conditional could be removed if BitSet was preallocated, but that
60: // would take up more memory, and add more GC time...
61: if (bits == null)
62: bits = new OpenBitSet(maxDoc);
63: bits.fastSet(doc);
64: }
65:
66: pos++;
67: }
68:
69: public DocSet getDocSet() {
70: if (pos <= scratch.length) {
71: return new HashDocSet(scratch, 0, pos,
72: HASHSET_INVERSE_LOAD_FACTOR);
73: } else {
74: // set the bits for ids that were collected in the array
75: for (int i = 0; i < scratch.length; i++)
76: bits.fastSet(scratch[i]);
77: return new BitDocSet(bits, pos);
78: }
79: }
80: }
|