001: package it.unimi.dsi.fastutil;
002:
003: /*
004: * fastutil: Fast & compact type-specific collections for Java
005: *
006: * Copyright (C) 2002-2008 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU Lesser General Public
010: * License as published by the Free Software Foundation; either
011: * version 2.1 of the License, or (at your option) any later version.
012: *
013: * This library is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * Lesser General Public License for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public
019: * License along with this library; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
021: *
022: */
023:
024: /** Basic data for all hash-based classes.
025: *
026: * <p>The classes in <code>fastutil</code> are built around open-addressing hashing
027: * implemented <em>via</em> double hashing. Following Knuth's suggestions in the third volume of <em>The Art of Computer
028: * Programming</em>, we use for the table size a prime <var>p</var> such that
029: * <var>p</var>-2 is also prime. In this way hashing is implemented with modulo <var>p</var>,
030: * and secondary hashing with modulo <var>p</var>-2.
031: *
032: * <p>Entries in a table can be in three states: {@link #FREE}, {@link #OCCUPIED} or {@link #REMOVED}.
033: * The naive handling of removed entries requires that you search for a free entry as if they were occupied. However,
034: * <code>fastutil</code> implements two useful optimizations, based on the following invariant:
035: * <blockquote>
036: * Let <var>i</var><sub>0</sub>, <var>i</var><sub>1</sub>, &hellip, <var>i</var><sub><var>p</var>-1</sub> be
037: * the permutation of the table indices induced by the key <var>k</var>, that is, <var>i</var><sub>0</sub> is the hash
038: * of <var>k</var> and the following indices are obtained by adding (modulo <var>p</var>) the secondary hash plus one.
039: * If there is a {@link #OCCUPIED} entry with key <var>k</var>, its index in the sequence above comes <em>before</em>
040: * the indices of any {@link #REMOVED} entries with key <var>k</var>.
041: * </blockquote>
042: *
043: * <p>When we search for the key <var>k</var> we scan the entries in the
044: * sequence <var>i</var><sub>0</sub>, <var>i</var><sub>1</sub>, &hellip,
045: * <var>i</var><sub><var>p</var>-1</sub> and stop when <var>k</var> is found,
046: * when we finished the sequence or when we find a {@link #FREE} entry. Note
047: * that the correctness of this procedure it is not completely trivial. Indeed,
048: * when we stop at a {@link #REMOVED} entry with key <var>k</var> we must rely
049: * on the invariant to be sure that no {@link #OCCUPIED} entry with the same
050: * key can appear later. If we insert and remove frequently the same entries,
051: * this optimization can be very effective (note, however, that when using
052: * objects as keys or values deleted entries are set to a special fixed value to
053: * optimize garbage collection).
054: *
055: * <p>Moreover, during the probe we keep the index of the first {@link #REMOVED} entry we meet.
056: * If we actually have to insert a new element, we use that
057: * entry if we can, thus avoiding to pollute another {@link #FREE} entry. Since this position comes
058: * <i>a fortiori</i> before any {@link #REMOVED} entries with the same key, we are also keeping the invariant true.
059: */
060:
061: public interface Hash {
062:
063: /** The initial default size of a hash table. */
064: final public int DEFAULT_INITIAL_SIZE = 16;
065: /** The default load factor of a hash table. */
066: final public float DEFAULT_LOAD_FACTOR = .75f;
067: /** The load factor for a (usually small) table that is meant to be particularly fast. */
068: final public float FAST_LOAD_FACTOR = .5f;
069: /** The load factor for a (usually very small) table that is meant to be extremely fast. */
070: final public float VERY_FAST_LOAD_FACTOR = .25f;
071: /** The default growth factor of a hash table. */
072: final public int DEFAULT_GROWTH_FACTOR = 16;
073: /** The state of a free hash table entry. */
074: final public byte FREE = 0;
075: /** The state of a occupied hash table entry. */
076: final public byte OCCUPIED = -1;
077: /** The state of a hash table entry freed by a deletion. */
078: final public byte REMOVED = 1;
079:
080: /** A list of primes to be used as table sizes. The <var>i</var>-th element is
081: * the largest prime <var>p</var> smaller than 2<sup>(<var>i</var>+28)/16</sup>
082: * and such that <var>p</var>-2 is also prime (or 1, for the first few entries). */
083:
084: final public int PRIMES[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5,
085: 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 13, 13, 13,
086: 13, 13, 13, 13, 13, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
087: 19, 19, 31, 31, 31, 31, 31, 31, 31, 43, 43, 43, 43, 43, 43,
088: 43, 43, 61, 61, 61, 61, 61, 73, 73, 73, 73, 73, 73, 73,
089: 103, 103, 109, 109, 109, 109, 109, 139, 139, 151, 151, 151,
090: 151, 181, 181, 193, 199, 199, 199, 229, 241, 241, 241, 271,
091: 283, 283, 313, 313, 313, 349, 349, 349, 349, 421, 433, 463,
092: 463, 463, 523, 523, 571, 601, 619, 661, 661, 661, 661, 661,
093: 823, 859, 883, 883, 883, 1021, 1063, 1093, 1153, 1153,
094: 1231, 1321, 1321, 1429, 1489, 1489, 1621, 1699, 1789, 1873,
095: 1951, 2029, 2131, 2143, 2311, 2383, 2383, 2593, 2731, 2803,
096: 3001, 3121, 3259, 3391, 3583, 3673, 3919, 4093, 4273, 4423,
097: 4651, 4801, 5023, 5281, 5521, 5743, 5881, 6301, 6571, 6871,
098: 7129, 7489, 7759, 8089, 8539, 8863, 9283, 9721, 10141,
099: 10531, 11071, 11551, 12073, 12613, 13009, 13759, 14323,
100: 14869, 15649, 16363, 17029, 17839, 18541, 19471, 20233,
101: 21193, 22159, 23059, 24181, 25171, 26263, 27541, 28753,
102: 30013, 31321, 32719, 34213, 35731, 37309, 38923, 40639,
103: 42463, 44281, 46309, 48313, 50461, 52711, 55051, 57529,
104: 60091, 62299, 65521, 68281, 71413, 74611, 77713, 81373,
105: 84979, 88663, 92671, 96739, 100801, 105529, 109849, 115021,
106: 120079, 125509, 131011, 136861, 142873, 149251, 155863,
107: 162751, 169891, 177433, 185071, 193381, 202129, 211063,
108: 220021, 229981, 240349, 250969, 262111, 273643, 285841,
109: 298411, 311713, 325543, 339841, 355009, 370663, 386989,
110: 404269, 422113, 440809, 460081, 480463, 501829, 524221,
111: 547399, 571603, 596929, 623353, 651019, 679909, 709741,
112: 741343, 774133, 808441, 844201, 881539, 920743, 961531,
113: 1004119, 1048573, 1094923, 1143283, 1193911, 1246963,
114: 1302181, 1359733, 1420039, 1482853, 1548541, 1616899,
115: 1688413, 1763431, 1841293, 1922773, 2008081, 2097133,
116: 2189989, 2286883, 2388163, 2493853, 2604013, 2719669,
117: 2840041, 2965603, 3097123, 3234241, 3377191, 3526933,
118: 3682363, 3845983, 4016041, 4193803, 4379719, 4573873,
119: 4776223, 4987891, 5208523, 5439223, 5680153, 5931313,
120: 6194191, 6468463, 6754879, 7053331, 7366069, 7692343,
121: 8032639, 8388451, 8759953, 9147661, 9552733, 9975193,
122: 10417291, 10878619, 11360203, 11863153, 12387841, 12936529,
123: 13509343, 14107801, 14732413, 15384673, 16065559, 16777141,
124: 17519893, 18295633, 19105483, 19951231, 20834689, 21757291,
125: 22720591, 23726449, 24776953, 25873963, 27018853, 28215619,
126: 29464579, 30769093, 32131711, 33554011, 35039911, 36591211,
127: 38211163, 39903121, 41669479, 43514521, 45441199, 47452879,
128: 49553941, 51747991, 54039079, 56431513, 58930021, 61539091,
129: 64263571, 67108669, 70079959, 73182409, 76422793, 79806229,
130: 83339383, 87029053, 90881083, 94906249, 99108043,
131: 103495879, 108077731, 112863013, 117860053, 123078019,
132: 128526943, 134217439, 140159911, 146365159, 152845393,
133: 159612601, 166679173, 174058849, 181765093, 189812341,
134: 198216103, 206991601, 216156043, 225726379, 235720159,
135: 246156271, 257054491, 268435009, 280319203, 292730833,
136: 305691181, 319225021, 333358513, 348117151, 363529759,
137: 379624279, 396432481, 413983771, 432312511, 451452613,
138: 471440161, 492312523, 514109251, 536870839, 560640001,
139: 585461743, 611382451, 638450569, 666717199, 696235363,
140: 727060069, 759249643, 792864871, 827967631, 864625033,
141: 902905501, 942880663, 984625531, 1028218189, 1073741719,
142: 1121280091, 1170923713, 1222764841, 1276901371, 1333434301,
143: 1392470281, 1454120779, 1518500173, 1585729993, 1655935399,
144: 1729249999, 1805811253, 1885761133, 1969251079, 2056437379,
145: 2147482951 };
146:
147: /** A generic hash strategy.
148: *
149: * <P>Custom hash structures (e.g., {@link
150: * it.unimi.dsi.fastutil.objects.ObjectOpenCustomHashSet}) allow to hash objects
151: * using arbitrary functions, a typical example being that of {@linkplain
152: * it.unimi.dsi.fastutil.ints.IntArrays#HASH_STRATEGY arrays}. Of course,
153: * one has to compare objects for equality consistently with the chosen
154: * function. A <em>hash strategy</em>, thus, specifies an {@linkplain
155: * #equals(Object,Object) equality method} and a {@linkplain
156: * #hashCode(Object) hash function}, with the obvious property that
157: * equal objects must have the same hash code.
158: *
159: * <P>If your custom collection must be able to contain <code>null</code>,
160: * then your strategy must be able to handle <code>null</code>, too.
161: */
162:
163: public interface Strategy<K> {
164:
165: /** Returns the hash code of the specified object with respect to this hash strategy.
166: *
167: * @param o an object (or <code>null</code>).
168: * @return the hash code of the given object with respect to this hash strategy.
169: */
170:
171: public int hashCode(K o);
172:
173: /** Returns true if the given objects are equal with respect to this hash strategy.
174: *
175: * @param a an object (or <code>null</code>).
176: * @param b another object (or <code>null</code>).
177: * @return true if the two specified objects are equal with respect to this hash strategy.
178: */
179: public boolean equals(K a, K b);
180: }
181:
182: }
|