001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.search;
017:
018: import org.apache.solr.core.SolrException;
019: import org.apache.solr.util.OpenBitSet;
020:
021: import java.util.BitSet;
022:
023: /**
024: * <code>DocSet</code> represents an unordered set of Lucene Document Ids.
025: *
026: * <p>
027: * WARNING: Any DocSet returned from SolrIndexSearcher should <b>not</b> be modified as it may have been retrieved from
028: * a cache and could be shared.
029: * </p>
030: *
031: * @author yonik
032: * @version $Id: DocSet.java 542679 2007-05-29 22:28:21Z ryan $
033: * @since solr 0.9
034: */
035: public interface DocSet /* extends Collection<Integer> */{
036:
037: /**
038: * Adds the specified document if it is not currently in the DocSet
039: * (optional operation).
040: *
041: * @see #addUnique
042: * @throws SolrException if the implementation does not allow modifications
043: */
044: public void add(int doc);
045:
046: /**
047: * Adds a document the caller knows is not currently in the DocSet
048: * (optional operation).
049: *
050: * <p>
051: * This method may be faster then <code>add(doc)</code> in some
052: * implementaions provided the caller is certain of the precondition.
053: * </p>
054: *
055: * @see #add
056: * @throws SolrException if the implementation does not allow modifications
057: */
058: public void addUnique(int doc);
059:
060: /**
061: * Returns the number of documents in the set.
062: */
063: public int size();
064:
065: /**
066: * Returns true if a document is in the DocSet.
067: */
068: public boolean exists(int docid);
069:
070: /**
071: * Returns an iterator that may be used to iterate over all of the documents in the set.
072: *
073: * <p>
074: * The order of the documents returned by this iterator is
075: * non-deterministic, and any scoring information is meaningless
076: * </p>
077: */
078: public DocIterator iterator();
079:
080: /**
081: * Returns a BitSet view of the DocSet. Any changes to this BitSet <b>may</b>
082: * be reflected in the DocSet, hence if the DocSet is shared or was returned from
083: * a SolrIndexSearcher method, it's not safe to modify the BitSet.
084: *
085: * @return
086: * An OpenBitSet with the bit number of every docid set in the set.
087: */
088: @Deprecated
089: public OpenBitSet getBits();
090:
091: /**
092: * Returns the approximate amount of memory taken by this DocSet.
093: * This is only an approximation and doesn't take into account java object overhead.
094: *
095: * @return
096: * the approximate memory consumption in bytes
097: */
098: public long memSize();
099:
100: /**
101: * Returns the intersection of this set with another set. Neither set is modified - a new DocSet is
102: * created and returned.
103: * @return a DocSet representing the intersection
104: */
105: public DocSet intersection(DocSet other);
106:
107: /**
108: * Returns the number of documents of the intersection of this set with another set.
109: * May be more efficient than actually creating the intersection and then getting it's size.
110: */
111: public int intersectionSize(DocSet other);
112:
113: /**
114: * Returns the union of this set with another set. Neither set is modified - a new DocSet is
115: * created and returned.
116: * @return a DocSet representing the union
117: */
118: public DocSet union(DocSet other);
119:
120: /**
121: * Returns the number of documents of the union of this set with another set.
122: * May be more efficient than actually creating the union and then getting it's size.
123: */
124: public int unionSize(DocSet other);
125:
126: /**
127: * Returns the documents in this set that are not in the other set. Neither set is modified - a new DocSet is
128: * created and returned.
129: * @return a DocSet representing this AND NOT other
130: */
131: public DocSet andNot(DocSet other);
132:
133: /**
134: * Returns the number of documents in this set that are not in the other set.
135: */
136: public int andNotSize(DocSet other);
137: }
138:
139: /** A base class that may be usefull for implementing DocSets */
140: abstract class DocSetBase implements DocSet {
141:
142: // Not implemented efficiently... for testing purposes only
143: public boolean equals(Object obj) {
144: if (!(obj instanceof DocSet))
145: return false;
146: DocSet other = (DocSet) obj;
147: if (this .size() != other.size())
148: return false;
149:
150: if (this instanceof DocList && other instanceof DocList) {
151: // compare ordering
152: DocIterator i1 = this .iterator();
153: DocIterator i2 = this .iterator();
154: while (i1.hasNext() && i2.hasNext()) {
155: if (i1.nextDoc() != i2.nextDoc())
156: return false;
157: }
158: return true;
159: // don't compare matches
160: }
161:
162: // if (this.size() != other.size()) return false;
163: return this .getBits().equals(other.getBits());
164: }
165:
166: /**
167: * @throws SolrException Base implementation does not allow modifications
168: */
169: public void add(int doc) {
170: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
171: "Unsupported Operation");
172: }
173:
174: /**
175: * @throws SolrException Base implementation does not allow modifications
176: */
177: public void addUnique(int doc) {
178: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
179: "Unsupported Operation");
180: }
181:
182: /**
183: * Inefficient base implementation.
184: *
185: * @see BitDocSet#getBits
186: */
187: public OpenBitSet getBits() {
188: OpenBitSet bits = new OpenBitSet();
189: for (DocIterator iter = iterator(); iter.hasNext();) {
190: bits.set(iter.nextDoc());
191: }
192: return bits;
193: };
194:
195: public DocSet intersection(DocSet other) {
196: // intersection is overloaded in HashDocSet to be more
197: // efficient, so if "other" is a HashDocSet, dispatch off
198: // of it instead.
199: if (other instanceof HashDocSet) {
200: return other.intersection(this );
201: }
202:
203: // Default... handle with bitsets.
204: OpenBitSet newbits = (OpenBitSet) (this .getBits().clone());
205: newbits.and(other.getBits());
206: return new BitDocSet(newbits);
207: }
208:
209: public DocSet union(DocSet other) {
210: OpenBitSet newbits = (OpenBitSet) (this .getBits().clone());
211: newbits.or(other.getBits());
212: return new BitDocSet(newbits);
213: }
214:
215: public int intersectionSize(DocSet other) {
216: // intersectionSize is overloaded in HashDocSet to be more
217: // efficient, so if "other" is a HashDocSet, dispatch off
218: // of it instead.
219: if (other instanceof HashDocSet) {
220: return other.intersectionSize(this );
221: }
222: // less efficient way: do the intersection then get it's size
223: return intersection(other).size();
224: }
225:
226: public int unionSize(DocSet other) {
227: return this .size() + other.size()
228: - this .intersectionSize(other);
229: }
230:
231: public DocSet andNot(DocSet other) {
232: OpenBitSet newbits = (OpenBitSet) (this .getBits().clone());
233: newbits.andNot(other.getBits());
234: return new BitDocSet(newbits);
235: }
236:
237: public int andNotSize(DocSet other) {
238: return this.size() - this.intersectionSize(other);
239: }
240: }
|