//******************************
// Written by Peter Golde
// Copyright (c) 2004-2005, Wintellect
//
// Use and restribution of this code is subject to the license agreement
// contained in the file "License.txt" accompanying this file.
//******************************
using System;
using System.Diagnostics;
using System.Collections.Generic;
using System.Runtime.Serialization;
namespace Wintellect.PowerCollections{
/// <summary>
/// The base implementation for various collections classes that use hash tables
/// as part of their implementation. This class should not (and can not) be
/// used directly by end users; it's only for internal use by the collections package. The Hash
/// does not handle duplicate values.
/// </summary>
/// <remarks>
/// The Hash manages items of type T, and uses a IComparer<ItemTYpe> that
/// hashes compares items to hash items into the table.
///</remarks>
[Serializable]
internal class Hash<T> : IEnumerable<T>, ISerializable, IDeserializationCallback
{
// NOTE: If you add new member variables, you very well may need to change the serialization
// code to serialize that member.
private IEqualityComparer<T> equalityComparer; // interface for comparing elements
private int count; // The count of elements in the table.
private int usedSlots; // Includes real elements and deleted elements with the collision bit on. Used to determine
// when we need to resize.
private int totalSlots; // Size of the table. Always a power of two.
private float loadFactor; // maximal load factor for the table.
private int thresholdGrow; // floor(totalSlots * loadFactor);
private int thresholdShrink; // thresholdGrow / 3.
private int hashMask; // Mask to convert hash values to the size of the table.
private int secondaryShift; // Shift to get the secondary skip value.
private Slot[] table; // The hash table.
private int changeStamp; // An integer that is changed every time the table structurally changes.
// Used so that enumerations throw an exception if the tree is changed
// during enumeration.
private const int MINSIZE = 16; // minimum number of slots.
private SerializationInfo serializationInfo; // Info used during deserialization.
/// <summary>
/// The structure that has each slot in the hash table. Each slot has three parts:
/// 1. The collision bit. Indicates whether some item visited this slot but had to
/// keep looking because the slot was full.
/// 2. 31-bit full hash value of the item. If zero, the slot is empty.
/// 3. The item itself.
/// </summary>
struct Slot
{
private uint hash_collision; // Lower 31 bits: the hash value. Top bit: the collision bit.
public T item; // The item.
/// <summary>
/// The full hash value associated with the value in this slot, or zero
/// if the slot is empty.
/// </summary>
public int HashValue
{
get {
return (int) (hash_collision & 0x7FFFFFFF);
}
set {
Debug.Assert((value & 0x80000000) == 0); // make sure sign bit isn't set.
hash_collision = (uint)value | (hash_collision & 0x80000000);
}
}
/// <summary>
/// Is this slot empty?
/// </summary>
public bool Empty {
get {
return HashValue == 0;
}
}
/// <summary>
/// Clear this slot, leaving the collision bit alone.
/// </summary>
public void Clear() {
HashValue = 0;
item = default(T); // Done to avoid keeping things alive that shouldn't be.
}
/// <summary>
/// The "Collision" bit indicates that some value hit this slot and
/// collided, so had to try another slot.
/// </summary>
public bool Collision
{
get
{
return (hash_collision & 0x80000000) != 0;
}
set
{
if (value)
hash_collision |= 0x80000000;
else
hash_collision &= 0x7FFFFFFF;
}
}
}
/// <summary>
/// Constructor. Create a new hash table.
/// </summary>
/// <param name="equalityComparer">The comparer to use to compare items. </param>
public Hash(IEqualityComparer<T> equalityComparer)
{
this.equalityComparer = equalityComparer;
this.loadFactor = 0.70F; // default load factor.
}
/// <summary>
/// Gets the current enumeration stamp. Call CheckEnumerationStamp later
/// with this value to throw an exception if the hash table is changed.
/// </summary>
/// <returns>The current enumeration stamp.</returns>
internal int GetEnumerationStamp()
{
return changeStamp;
}
/// <summary>
/// Must be called whenever there is a structural change in the tree. Causes
/// changeStamp to be changed, which causes any in-progress enumerations
/// to throw exceptions.
/// </summary>
internal void StopEnumerations()
{
++changeStamp;
}
/// <summary>
/// Checks the given stamp against the current change stamp. If different, the
/// collection has changed during enumeration and an InvalidOperationException
/// must be thrown
/// </summary>
/// <param name="startStamp">changeStamp at the start of the enumeration.</param>
internal void CheckEnumerationStamp(int startStamp)
{
if (startStamp != changeStamp) {
throw new InvalidOperationException(Strings.ChangeDuringEnumeration);
}
}
/// <summary>
/// Gets the full hash code for an item.
/// </summary>
/// <param name="item">Item to get hash code for.</param>
/// <returns>The full hash code. It is never zero.</returns>
private int GetFullHash(T item)
{
uint hash;
hash = (uint)Util.GetHashCode(item, equalityComparer);
// The .NET framework tends to produce pretty bad hash codes.
// Scramble them up to be much more random!
hash += ~(hash << 15);
hash ^= (hash >> 10);
hash += (hash << 3);
hash ^= (hash >> 6);
hash += ~(hash << 11);
hash ^= (hash >> 16);
hash &= 0x7FFFFFFF;
if (hash == 0)
hash = 0x7FFFFFFF; // Make sure it isn't zero.
return (int)hash;
}
/// <summary>
/// Get the initial bucket number and skip amount from the full hash value.
/// </summary>
/// <param name="hash">The full hash value.</param>
/// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
/// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
private void GetHashValuesFromFullHash(int hash, out int initialBucket, out int skip)
{
initialBucket = hash & hashMask;
// The skip value must be relatively prime to the table size. Since the table size is a
// power of two, any odd number is relatively prime, so oring in 1 will do it.
skip = ((hash >> secondaryShift) & hashMask) | 1;
}
/// <summary>
/// Gets the full hash value, initial bucket number, and skip amount for an item.
/// </summary>
/// <param name="item">Item to get hash value of.</param>
/// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
/// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
/// <returns>The full hash value. This is never zero.</returns>
private int GetHashValues(T item, out int initialBucket, out int skip)
{
int hash = GetFullHash(item);
GetHashValuesFromFullHash(hash, out initialBucket, out skip);
return hash;
}
/// <summary>
/// Make sure there are enough slots in the hash table that <paramref name="additionalItems"/>
/// items can be inserted into the table.
/// </summary>
/// <param name="additionalItems">Number of additional items we are inserting.</param>
private void EnsureEnoughSlots(int additionalItems)
{
StopEnumerations();
if (usedSlots + additionalItems > thresholdGrow) {
// We need to expand the table. Figure out to what size.
int newSize;
newSize = Math.Max(totalSlots, MINSIZE);
while ((int)(newSize * loadFactor) < usedSlots + additionalItems) {
newSize *= 2;
if (newSize <= 0) {
// Must have overflowed the size of an int. Hard to believe we didn't run out of memory first.
throw new InvalidOperationException(Strings.CollectionTooLarge);
}
}
ResizeTable(newSize);
}
}
/// <summary>
/// Check if the number of items in the table is small enough that
/// we should shrink the table again.
/// </summary>
private void ShrinkIfNeeded()
{
if (count < thresholdShrink) {
int newSize;
if (count > 0) {
newSize = MINSIZE;
while ((int)(newSize * loadFactor) < count)
newSize *= 2;
}
else {
// We've removed all the elements. Shrink to zero.
newSize = 0;
}
ResizeTable(newSize);
}
}
/// <summary>
/// Given the size of a hash table, compute the "secondary shift" value -- the shift
/// that is used to determine the skip amount for collision resolution.
/// </summary>
/// <param name="newSize">The new size of the table.</param>
/// <returns>The secondary skip amount.</returns>
private int GetSecondaryShift(int newSize)
{
int x = newSize - 2; // x is of the form 0000111110 -- a single string of 1's followed by a single zero.
int secondaryShift = 0;
// Keep shifting x until it is the set of bits we want to extract: it be the highest bits possible,
// but can't overflow into the sign bit.
while ((x & 0x40000000) == 0) {
x <<= 1;
++secondaryShift;
}
return secondaryShift;
}
/// <summary>
/// Resize the hash table to the given new size, moving all items into the
/// new hash table.
/// </summary>
/// <param name="newSize">The new size of the hash table. Must be a power
/// of two.</param>
private void ResizeTable(int newSize)
{
Slot[] oldTable = table; // Move all the items from this table to the new table.
Debug.Assert((newSize & (newSize - 1)) == 0); // Check newSize is a power of two.
totalSlots = newSize;
thresholdGrow = (int)(totalSlots * loadFactor);
thresholdShrink = thresholdGrow / 3;
if (thresholdShrink <= MINSIZE)
thresholdShrink = 1;
hashMask = newSize - 1;
secondaryShift = GetSecondaryShift(newSize);
if (totalSlots > 0)
table = new Slot[totalSlots];
else
table = null;
if (oldTable != null && table != null) {
foreach (Slot oldSlot in oldTable) {
int hash, bucket, skip;
hash = oldSlot.HashValue;
GetHashValuesFromFullHash(hash, out bucket, out skip);
// Find an empty bucket.
while (! table[bucket].Empty) {
// The slot is used, but isn't our item. Set the collision bit and keep looking.
table[bucket].Collision = true;
bucket = (bucket + skip) & hashMask;
}
// We found an empty bucket.
table[bucket].HashValue = hash;
table[bucket].item = oldSlot.item;
}
}
usedSlots = count; // no deleted elements have the collision bit on now.
}
/// <summary>
/// Get the number of items in the hash table.
/// </summary>
/// <value>The number of items stored in the hash table.</value>
public int ElementCount
{
get
{
return count;
}
}
/// <summary>
/// Get the number of slots in the hash table. Exposed internally
/// for testing purposes.
/// </summary>
/// <value>The number of slots in the hash table.</value>
internal int SlotCount
{
get
{
return totalSlots;
}
}
/// <summary>
/// Get or change the load factor. Changing the load factor may cause
/// the size of the table to grow or shrink accordingly.
/// </summary>
/// <value></value>
public float LoadFactor
{
get
{
return loadFactor;
}
set
{
// Don't allow hopelessly inefficient load factors.
if (value < 0.25 || value > 0.95)
throw new ArgumentOutOfRangeException("value", value, Strings.InvalidLoadFactor);
StopEnumerations();
bool maybeExpand = value < loadFactor; // May need to expand or shrink the table -- which?
// Update loadFactor and thresholds.
loadFactor = value;
thresholdGrow = (int)(totalSlots * loadFactor);
thresholdShrink = thresholdGrow / 3;
if (thresholdShrink <= MINSIZE)
thresholdShrink = 1;
// Possibly expand or shrink the table.
if (maybeExpand)
EnsureEnoughSlots(0);
else
ShrinkIfNeeded();
}
}
/// <summary>
/// Insert a new item into the hash table. If a duplicate item exists, can replace or
/// do nothing.
/// </summary>
/// <param name="item">The item to insert.</param>
/// <param name="replaceOnDuplicate">If true, duplicate items are replaced. If false, nothing
/// is done if a duplicate already exists.</param>
/// <param name="previous">If a duplicate was found, returns it (whether replaced or not).</param>
/// <returns>True if no duplicate existed, false if a duplicate was found.</returns>
public bool Insert(T item, bool replaceOnDuplicate, out T previous)
{
int hash, bucket, skip;
int emptyBucket = -1; // If >= 0, an empty bucket we can use for a true insert
bool duplicateMightExist = true; // If true, still the possibility that a duplicate exists.
EnsureEnoughSlots(1); // Ensure enough room to insert. Also stops enumerations.
hash = GetHashValues(item, out bucket, out skip);
for (;;) {
if (table[bucket].Empty) {
// Record the location of the first empty bucket seen. This is where the item will
// go if no duplicate exists.
if (emptyBucket == -1)
emptyBucket = bucket;
if (!duplicateMightExist || !table[bucket].Collision) {
// There can't be a duplicate further on, because a bucket with the collision bit
// clear was found (here or earlier). We have the place to insert.
break;
}
}
else if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item)) {
// We found a duplicate item. Replace it if requested to.
previous = table[bucket].item;
if (replaceOnDuplicate)
table[bucket].item = item;
return false;
}
else {
// The slot is used, but isn't our item.
if (!table[bucket].Collision) {
// Since the collision bit is off, we can't have a duplicate.
if (emptyBucket >= 0) {
// We already have an empty bucket to use.
break;
}
else {
// Keep searching for an empty bucket to place the item.
table[bucket].Collision = true;
duplicateMightExist = false;
}
}
}
bucket = (bucket + skip) & hashMask;
}
// We found an empty bucket. Insert the new item.
table[emptyBucket].HashValue = hash;
table[emptyBucket].item = item;
++count;
if (!table[emptyBucket].Collision)
++usedSlots;
previous = default(T);
return true;
}
/// <summary>
/// Deletes an item from the hash table.
/// </summary>
/// <param name="item">Item to search for and delete.</param>
/// <param name="itemDeleted">If true returned, the actual item stored in the hash table (must be
/// equal to <paramref name="item"/>, but may not be identical.</param>
/// <returns>True if item was found and deleted, false if item wasn't found.</returns>
public bool Delete(T item, out T itemDeleted)
{
int hash, bucket, skip;
StopEnumerations();
if (count == 0) {
itemDeleted = default(T);
return false;
}
hash = GetHashValues(item, out bucket, out skip);
for (; ; ) {
if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item)) {
// Found the item. Remove it.
itemDeleted = table[bucket].item;
table[bucket].Clear();
--count;
if (!table[bucket].Collision)
--usedSlots;
ShrinkIfNeeded();
return true;
}
else if (!table[bucket].Collision) {
// No collision bit, so we can stop searching. No such element.
itemDeleted = default(T);
return false;
}
bucket = (bucket + skip) & hashMask;
}
}
/// <summary>
/// Find an item in the hash table. If found, optionally replace it with the
/// finding item.
/// </summary>
/// <param name="find">Item to find.</param>
/// <param name="replace">If true, replaces the equal item in the hash table
/// with <paramref name="item"/>.</param>
/// <param name="item">Returns the equal item found in the table, if true was returned.</param>
/// <returns>True if the item was found, false otherwise.</returns>
public bool Find(T find, bool replace, out T item)
{
int hash, bucket, skip;
if (count == 0) {
item = default(T);
return false;
}
hash = GetHashValues(find, out bucket, out skip);
for (; ; ) {
if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, find)) {
// Found the item.
item = table[bucket].item;
if (replace)
table[bucket].item = find;
return true;
}
else if (!table[bucket].Collision) {
// No collision bit, so we can stop searching. No such element.
item = default(T);
return false;
}
bucket = (bucket + skip) & hashMask;
}
}
/// <summary>
/// Enumerate all of the items in the hash table. The items
/// are enumerated in a haphazard, unpredictable order.
/// </summary>
/// <returns>An IEnumerator<T> that enumerates the items
/// in the hash table.</returns>
public IEnumerator<T> GetEnumerator()
{
if (count > 0) {
int startStamp = changeStamp;
foreach (Slot slot in table) {
if (!slot.Empty) {
yield return slot.item;
CheckEnumerationStamp(startStamp);
}
}
}
}
/// <summary>
/// Enumerate all of the items in the hash table. The items
/// are enumerated in a haphazard, unpredictable order.
/// </summary>
/// <returns>An IEnumerator that enumerates the items
/// in the hash table.</returns>
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <summary>
/// Creates a clone of this hash table.
/// </summary>
/// <param name="cloneItem">If non-null, this function is applied to each item when cloning. It must be the
/// case that this function does not modify the hash code or equality function.</param>
/// <returns>A shallow clone that contains the same items.</returns>
public Hash<T> Clone(Converter<T,T> cloneItem)
{
Hash<T> clone = new Hash<T>(equalityComparer);
clone.count = this.count;
clone.usedSlots = this.usedSlots;
clone.totalSlots = this.totalSlots;
clone.loadFactor = this.loadFactor;
clone.thresholdGrow = this.thresholdGrow;
clone.thresholdShrink = this.thresholdShrink;
clone.hashMask = this.hashMask;
clone.secondaryShift = this.secondaryShift;
if (table != null) {
clone.table = (Slot[])table.Clone();
if (cloneItem != null) {
for (int i = 0; i < table.Length; ++i) {
if (!table[i].Empty)
table[i].item = cloneItem(table[i].item);
}
}
}
return clone;
}
#region Serialization
/// <summary>
/// Serialize the hash table. Called from the serialization infrastructure.
/// </summary>
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
if (info == null)
throw new ArgumentNullException("info");
info.AddValue("equalityComparer", equalityComparer, typeof(IEqualityComparer<T>));
info.AddValue("loadFactor", loadFactor, typeof(float));
T[] items = new T[count];
int i = 0;
foreach (Slot slot in table)
if (! slot.Empty)
items[i++] = slot.item;
info.AddValue("items", items, typeof(T[]));
}
/// <summary>
/// Called on deserialization. We cannot deserialize now, because hash codes
/// might not be correct now. We do real deserialization in the OnDeserialization call.
/// </summary>
protected Hash(SerializationInfo serInfo, StreamingContext context)
{
// Save away the serialization info for use later. We can't be sure of hash codes
// being stable until the entire object graph is deserialized, so we wait until then
// to deserialize.
serializationInfo = serInfo;
}
/// <summary>
/// Deserialize the hash table. Called from the serialization infrastructure when
/// the object graph has finished deserializing.
/// </summary>
void IDeserializationCallback.OnDeserialization(object sender)
{
if (serializationInfo == null)
return;
loadFactor = serializationInfo.GetSingle("loadFactor");
equalityComparer = (IEqualityComparer<T>) serializationInfo.GetValue("equalityComparer", typeof(IEqualityComparer<T>));
T[] items = (T[])serializationInfo.GetValue("items", typeof(T[]));
T dummy;
EnsureEnoughSlots(items.Length);
foreach (T item in items)
Insert(item, true, out dummy);
serializationInfo = null;
}
#endregion Serialization
#if DEBUG
/// <summary>
/// Print out basic stats about the hash table.
/// </summary>
internal void PrintStats()
{
Console.WriteLine("count={0} usedSlots={1} totalSlots={2}", count, usedSlots, totalSlots);
Console.WriteLine("loadFactor={0} thresholdGrow={1} thresholdShrink={2}", loadFactor, thresholdGrow, thresholdShrink);
Console.WriteLine("hashMask={0:X} secondaryShift={1}", hashMask, secondaryShift);
Console.WriteLine();
}
/// <summary>
/// Print out the state of the hash table and each of the slots. Each slot looks like:
/// Slot 4: C 4513e41e hello
/// where the "C" indicates the collision bit is on
/// the next hex number is the hash value
/// followed by ToString() on the item.
/// </summary>
internal void Print()
{
PrintStats();
for (int i = 0; i < totalSlots; ++i)
Console.WriteLine("Slot {0,4:X}: {1} {2,8:X} {3}", i, table[i].Collision ? "C" : " ",
table[i].HashValue, table[i].Empty ? "<empty>" : table[i].item.ToString());
Console.WriteLine();
}
/// <summary>
/// Check that everything appears to be OK in the hash table.
/// </summary>
internal void Validate()
{
Debug.Assert(count <= usedSlots);
Debug.Assert(count <= totalSlots);
Debug.Assert(usedSlots <= totalSlots);
Debug.Assert(usedSlots <= thresholdGrow);
Debug.Assert((int)(totalSlots * loadFactor) == thresholdGrow);
if (thresholdShrink > 1)
Debug.Assert(thresholdGrow / 3 == thresholdShrink);
else
Debug.Assert(thresholdGrow / 3 <= MINSIZE);
if (totalSlots > 0) {
Debug.Assert((totalSlots & (totalSlots - 1)) == 0); // totalSlots is a power of two.
Debug.Assert(totalSlots - 1 == hashMask);
Debug.Assert(GetSecondaryShift(totalSlots) == secondaryShift);
Debug.Assert(totalSlots == table.Length);
}
// Traverse the table. Make sure that count and usedSlots are right, and that
// each slot looks reasonable.
int expectedCount = 0, expectedUsed = 0, initialBucket, skip;
if (table != null) {
for (int i = 0; i < totalSlots; ++i) {
Slot slot = table[i];
if (slot.Empty) {
// Empty slot
if (slot.Collision)
++expectedUsed;
Debug.Assert(object.Equals(default(T), slot.item));
}
else {
// not empty.
++expectedCount;
++expectedUsed;
Debug.Assert(slot.HashValue != 0);
Debug.Assert(GetHashValues(slot.item, out initialBucket, out skip) == slot.HashValue);
if (initialBucket != i)
Debug.Assert(table[initialBucket].Collision);
}
}
}
Debug.Assert(expectedCount == count);
Debug.Assert(expectedUsed == usedSlots);
}
#endif //DEBUG
}
}
|