001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.util;
017:
018: /** An iterator to iterate over set bits in an OpenBitSet.
019: * This is faster than nextSetBit() for iterating over the complete set of bits,
020: * especially when the density of the bits set is high.
021: *
022: * @author yonik
023: * @version $Id$
024: */
025: public class BitSetIterator {
026:
027: // The General Idea: instead of having an array per byte that has
028: // the offsets of the next set bit, that array could be
029: // packed inside a 32 bit integer (8 4 bit numbers). That
030: // should be faster than accessing an array for each index, and
031: // the total array size is kept smaller (256*sizeof(int))=1K
032: protected final static int[] bitlist = { 0x0, 0x1, 0x2, 0x21, 0x3,
033: 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, 0x431,
034: 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532,
035: 0x5321, 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432,
036: 0x54321, 0x6, 0x61, 0x62, 0x621, 0x63, 0x631, 0x632,
037: 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432,
038: 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532,
039: 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431,
040: 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721, 0x73, 0x731,
041: 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, 0x743, 0x7431,
042: 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, 0x7531,
043: 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543,
044: 0x75431, 0x75432, 0x754321, 0x76, 0x761, 0x762, 0x7621,
045: 0x763, 0x7631, 0x7632, 0x76321, 0x764, 0x7641, 0x7642,
046: 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, 0x765, 0x7651,
047: 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321,
048: 0x7654, 0x76541, 0x76542, 0x765421, 0x76543, 0x765431,
049: 0x765432, 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831,
050: 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431,
051: 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531,
052: 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421, 0x8543,
053: 0x85431, 0x85432, 0x854321, 0x86, 0x861, 0x862, 0x8621,
054: 0x863, 0x8631, 0x8632, 0x86321, 0x864, 0x8641, 0x8642,
055: 0x86421, 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651,
056: 0x8652, 0x86521, 0x8653, 0x86531, 0x86532, 0x865321,
057: 0x8654, 0x86541, 0x86542, 0x865421, 0x86543, 0x865431,
058: 0x865432, 0x8654321, 0x87, 0x871, 0x872, 0x8721, 0x873,
059: 0x8731, 0x8732, 0x87321, 0x874, 0x8741, 0x8742, 0x87421,
060: 0x8743, 0x87431, 0x87432, 0x874321, 0x875, 0x8751, 0x8752,
061: 0x87521, 0x8753, 0x87531, 0x87532, 0x875321, 0x8754,
062: 0x87541, 0x87542, 0x875421, 0x87543, 0x875431, 0x875432,
063: 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763, 0x87631,
064: 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, 0x876421,
065: 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, 0x87651,
066: 0x87652, 0x876521, 0x87653, 0x876531, 0x876532, 0x8765321,
067: 0x87654, 0x876541, 0x876542, 0x8765421, 0x876543,
068: 0x8765431, 0x8765432, 0x87654321 };
069: /***** the python code that generated bitlist
070: def bits2int(val):
071: arr=0
072: for shift in range(8,0,-1):
073: if val & 0x80:
074: arr = (arr << 4) | shift
075: val = val << 1
076: return arr
077:
078: def int_table():
079: tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
080: return ','.join(tbl)
081: ******/
082:
083: // hmmm, what about an iterator that finds zeros though,
084: // or a reverse iterator... should they be separate classes
085: // for efficiency, or have a common root interface? (or
086: // maybe both? could ask for a SetBitsIterator, etc...
087:
088: private final long[] arr;
089: private final int words;
090: private int i = -1;
091: private long word;
092: private int wordShift;
093: private int indexArray;
094:
095: public BitSetIterator(OpenBitSet obs) {
096: this (obs.getBits(), obs.getNumWords());
097: }
098:
099: public BitSetIterator(long[] bits, int numWords) {
100: arr = bits;
101: words = numWords;
102: }
103:
104: // 64 bit shifts
105: private void shift() {
106: if ((int) word == 0) {
107: wordShift += 32;
108: word = word >>> 32;
109: }
110: if ((word & 0x0000FFFF) == 0) {
111: wordShift += 16;
112: word >>>= 16;
113: }
114: if ((word & 0x000000FF) == 0) {
115: wordShift += 8;
116: word >>>= 8;
117: }
118: indexArray = bitlist[(int) word & 0xff];
119: }
120:
121: /***** alternate shift implementations
122: // 32 bit shifts, but a long shift needed at the end
123: private void shift2() {
124: int y = (int)word;
125: if (y==0) {wordShift +=32; y = (int)(word >>>32); }
126: if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
127: if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
128: indexArray = bitlist[y & 0xff];
129: word >>>= (wordShift +1);
130: }
131:
132: private void shift3() {
133: int lower = (int)word;
134: int lowByte = lower & 0xff;
135: if (lowByte != 0) {
136: indexArray=bitlist[lowByte];
137: return;
138: }
139: shift();
140: }
141: ******/
142:
143: public int next() {
144: if (indexArray == 0) {
145: if (word != 0) {
146: word >>>= 8;
147: wordShift += 8;
148: }
149:
150: while (word == 0) {
151: if (++i >= words)
152: return -1;
153: word = arr[i];
154: wordShift = -1; // loop invariant code motion should move this
155: }
156:
157: // after the first time, should I go with a linear search, or
158: // stick with the binary search in shift?
159: shift();
160: }
161:
162: int bitIndex = (indexArray & 0x0f) + wordShift;
163: indexArray >>>= 4;
164: // should i<<6 be cached as a separate variable?
165: // it would only save one cycle in the best circumstances.
166: return (i << 6) + bitIndex;
167: }
168:
169: public int next(int fromIndex) {
170: indexArray = 0;
171: i = fromIndex >> 6;
172: if (i >= words) {
173: word = 0; // setup so next() will also return -1
174: return -1;
175: }
176: wordShift = fromIndex & 0x3f;
177: word = arr[i] >>> wordShift;
178: if (word != 0) {
179: wordShift--; // compensate for 1 based arrIndex
180: } else {
181: while (word == 0) {
182: if (++i >= words)
183: return -1;
184: word = arr[i];
185: }
186: wordShift = -1;
187: }
188:
189: shift();
190:
191: int bitIndex = (indexArray & 0x0f) + wordShift;
192: indexArray >>>= 4;
193: // should i<<6 be cached as a separate variable?
194: // it would only save one cycle in the best circumstances.
195: return (i << 6) + bitIndex;
196: }
197:
198: }
|