标签:run cache mit nal void ignore asc case dict
C# HashSet源码分享 自定义HashSet
官网源码地址:
https://referencesource.microsoft.com/#System.Core/System/Collections/Generic/HashSet.cs
关键点
-
实现原理和Dictionary差不多
- Dictionary 有Key Value
- HashSet只有Value
-
实际容器为Slot[] m_slots;
-
HashSet操作元素的时间复杂度接近O(1)
- 定义int[] m_buckets 数组来保存元素在实际容器Slot[] m_slots 位置
- 即 Value的保存在 m_slots[m_buckets[value.GetHashCode()%m_buckets.Length]].value
-
容器长度为质数
-
当位置冲突时使用Slot.next保存数据
-
数据已满时添加数据扩容会自动扩充当前容量的2倍
- 新建一个2倍大小的容器
- 数据拷贝过去 重新计算位置
优化点
- 已知容器大小的情况 直接初始化对应大小
- 自定义元素可以实现IEqualityComparer可以更高效判断相等和获取HashCode
自定义HashSet
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.Serialization;
public class MyHashSet : IEnumerable
{
internal struct Slot
{
///
/// 如果未使用则为-1
///
internal int hashCode;
internal T value;
///
/// hashCode冲突是指向下一索引 未使用为-1
///
internal int next;
}
internal struct ElementCount
{
internal int uniqueCount;
internal int unfoundCount;
}
private const int Lower31BitMask = 0x7FFFFFFF;
///
/// 堆栈阈值
///
private const int StackAllocThreshold = 100;
///
/// Hash桶
///
private int[] m_buckets;
///
/// 数据
///
private Slot[] m_slots;
///
/// 实际使用大小
///
private int m_count;
///
/// 上次使用索引位置
///
private int m_lastIndex;
///
/// 空闲索引
///
private int m_freeList;
///
/// 相等对比函数
///
private IEqualityComparer m_comparer;
///
/// 版本
///
private int m_version;
///
/// 初始化 使用默认相等对比函数
///
public MyHashSet() : this(EqualityComparer.Default) { }
///
/// 初始化
///
/// 相等对比函数
public MyHashSet(IEqualityComparer comparer)
{
if (comparer == null)
{
comparer = EqualityComparer.Default;
}
this.m_comparer = comparer;
m_lastIndex = 0;
m_count = 0;
m_freeList = -1;
m_version = 0;
}
///
/// 初始化
///
/// 指定容器
public MyHashSet(IEnumerable collection) : this(collection, EqualityComparer.Default) { }
///
/// 初始化
///
/// 指定容器
/// 指定相等对比函数
public MyHashSet(IEnumerable collection, IEqualityComparer comparer) : this(comparer)
{
if (collection == null)
{
throw new ArgumentNullException("collection");
}
int suggestedCapacity = 0;
ICollection coll = collection as ICollection;
if (coll != null)
{
suggestedCapacity = coll.Count;
}
Initialize(suggestedCapacity);
this.UnionWith(collection);//合并数据
if ((m_count == 0 && m_slots.Length > 3) || (m_count > 0 && m_slots.Length / m_count > 3))
{
TrimExcess();//设置容量为实际元素数
}
}
///
/// 数量
///
public int Count
{
get { return m_count; }
}
///
/// 添加
///
///
///
public bool Add(T item)
{
return AddIfNotPresent(item);
}
///
/// 清理
///
public void Clear()
{
if (m_lastIndex > 0)
{
Debug.Assert(m_buckets != null, "m_buckets was null but m_lastIndex > 0");
Array.Clear(m_slots, 0, m_lastIndex);
Array.Clear(m_buckets, 0, m_buckets.Length);
m_lastIndex = 0;
m_count = 0;
m_freeList = -1;
}
m_version++;
}
///
/// 是否包含
///
///
///
public bool Contains(T item)
{
if (m_buckets != null)
{
int hashCode = InternalGetHashCode(item);
// see note at "HashSet" level describing why "- 1" appears in for loop
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next)
{
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, item))
{
return true;
}
}
}
// either m_buckets is null or wasn‘t found
return false;
}
///
/// 拷贝到数组
///
/// 拷贝到数组容器
public void CopyTo(T[] array)
{
CopyTo(array, 0, m_count);
}
///
/// 拷贝到数组
///
/// 拷贝到数组容器
/// 数组容器开始项
public void CopyTo(T[] array, int arrayIndex)
{
CopyTo(array, arrayIndex, m_count);
}
///
/// 拷贝到数组
///
/// 拷贝到数组容器
/// 数组容器开始项
/// 拷贝数量
public void CopyTo(T[] array, int arrayIndex, int count)
{
if (array == null)
{
throw new ArgumentNullException("array");
}
if (arrayIndex array.Length || count > array.Length - arrayIndex)
{
throw new ArgumentException("arrayIndex > array.Length || count > array.Length - arrayIndex");
}
int numCopied = 0;
for (int i = 0; i = 0)
{
array[arrayIndex + numCopied] = m_slots[i].value;
numCopied++;
}
}
}
///
/// 删除和other相等的项
///
///
public void ExceptWith(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
// this is already the enpty set; return
if (m_count == 0)
{
return;
}
// special case if other is this; a set minus itself is the empty set
if (other == this)
{
Clear();
return;
}
// remove every element in other from this
foreach (T element in other)
{
Remove(element);
}
}
///
/// 获取迭代枚举数
///
///
public Enumerator GetEnumerator()
{
return new Enumerator(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return new Enumerator(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return new Enumerator(this);
}
public IEqualityComparer Comparer
{
get
{
return m_comparer;
}
}
public virtual void GetObjectData(SerializationInfo info, StreamingContext context)
{
}
///
/// 求和other交集
///
///
public void IntersectWith(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
if (m_count == 0)
{
return;
}
ICollection otherAsCollection = other as ICollection;
if (otherAsCollection != null)
{
if (otherAsCollection.Count == 0)
{
Clear();
return;
}
MyHashSet otherAsSet = other as MyHashSet;
// faster if other is a hashset using same equality comparer; so check
// that other is a hashset using the same equality comparer.
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
IntersectWithHashSetWithSameEC(otherAsSet);
return;
}
}
IntersectWithEnumerable(other);
}
///
/// 是否为other的真子集
///
///
///
public bool IsProperSubsetOf(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
ICollection otherAsCollection = other as ICollection;
if (otherAsCollection != null)
{
// the empty set is a proper subset of anything but the empty set
if (m_count == 0)
{
return otherAsCollection.Count > 0;
}
MyHashSet otherAsSet = other as MyHashSet;
// faster if other is a hashset (and we‘re using same equality comparer)
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
if (m_count >= otherAsSet.Count)
{
return false;
}
// this has strictly less than number of items in other, so the following
// check suffices for proper subset.
return IsSubsetOfHashSetWithSameEC(otherAsSet);
}
}
ElementCount result = CheckUniqueAndUnfoundElements(other, false);
return (result.uniqueCount == m_count && result.unfoundCount > 0);
}
///
/// 是否为other的真超集
///
///
///
public bool IsProperSupersetOf(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
// the empty set isn‘t a proper superset of any set.
if (m_count == 0)
{
return false;
}
ICollection otherAsCollection = other as ICollection;
if (otherAsCollection != null)
{
// if other is the empty set then this is a superset
if (otherAsCollection.Count == 0)
{
// note that this has at least one element, based on above check
return true;
}
MyHashSet otherAsSet = other as MyHashSet;
// faster if other is a hashset with the same equality comparer
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
if (otherAsSet.Count >= m_count)
{
return false;
}
// now perform element check
return ContainsAllElements(otherAsSet);
}
}
// couldn‘t fall out in the above cases; do it the long way
ElementCount result = CheckUniqueAndUnfoundElements(other, true);
return (result.uniqueCount
/// 是否为other的子集
///
///
///
public bool IsSubsetOf(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
// The empty set is a subset of any set
if (m_count == 0)
{
return true;
}
MyHashSet otherAsSet = other as MyHashSet;
// faster if other has unique elements according to this equality comparer; so check
// that other is a hashset using the same equality comparer.
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
// if this has more elements then it can‘t be a subset
if (m_count > otherAsSet.Count)
{
return false;
}
// already checked that we‘re using same equality comparer. simply check that
// each element in this is contained in other.
return IsSubsetOfHashSetWithSameEC(otherAsSet);
}
else
{
ElementCount result = CheckUniqueAndUnfoundElements(other, false);
return (result.uniqueCount == m_count && result.unfoundCount >= 0);
}
}
///
/// 是否为other的超集
///
///
///
public bool IsSupersetOf(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
// try to fall out early based on counts
ICollection otherAsCollection = other as ICollection;
if (otherAsCollection != null)
{
// if other is the empty set then this is a superset
if (otherAsCollection.Count == 0)
{
return true;
}
MyHashSet otherAsSet = other as MyHashSet;
// try to compare based on counts alone if other is a hashset with
// same equality comparer
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
if (otherAsSet.Count > m_count)
{
return false;
}
}
}
return ContainsAllElements(other);
}
public virtual void OnDeserialization(Object sender)
{
}
///
/// 是否包含other中至少一个元素
///
///
///
public bool Overlaps(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
if (m_count == 0)
{
return false;
}
foreach (T element in other)
{
if (Contains(element))
{
return true;
}
}
return false;
}
///
/// 删除元素
///
///
///
public bool Remove(T item)
{
if (m_buckets != null)
{
int hashCode = InternalGetHashCode(item);
int bucket = hashCode % m_buckets.Length;
int last = -1;
for (int i = m_buckets[bucket] - 1; i >= 0; last = i, i = m_slots[i].next)
{
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, item))
{
if (last
/// 删除匹配项 返回匹配数量
///
///
///
public int RemoveWhere(Predicate match)
{
if (match == null)
{
throw new ArgumentNullException("match");
}
int numRemoved = 0;
for (int i = 0; i = 0)
{
// cache value in case delegate removes it
T value = m_slots[i].value;
if (match(value))
{
// check again that remove actually removed it
if (Remove(value))
{
numRemoved++;
}
}
}
}
return numRemoved;
}
///
/// 判断other所有元素与自身相等
///
///
///
public bool SetEquals(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
MyHashSet otherAsSet = other as MyHashSet;
// faster if other is a hashset and we‘re using same equality comparer
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
// attempt to return early: since both contain unique elements, if they have
// different counts, then they can‘t be equal
if (m_count != otherAsSet.Count)
{
return false;
}
// already confirmed that the sets have the same number of distinct elements, so if
// one is a superset of the other then they must be equal
return ContainsAllElements(otherAsSet);
}
else
{
ICollection otherAsCollection = other as ICollection;
if (otherAsCollection != null)
{
// if this count is 0 but other contains at least one element, they can‘t be equal
if (m_count == 0 && otherAsCollection.Count > 0)
{
return false;
}
}
ElementCount result = CheckUniqueAndUnfoundElements(other, true);
return (result.uniqueCount == m_count && result.unfoundCount == 0);
}
}
///
/// 修改自身 删除存在自身和other的元素
///
///
public void SymmetricExceptWith(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
// if set is empty, then symmetric difference is other
if (m_count == 0)
{
UnionWith(other);
return;
}
// special case this; the symmetric difference of a set with itself is the empty set
if (other == this)
{
Clear();
return;
}
MyHashSet otherAsSet = other as MyHashSet;
// If other is a HashSet, it has unique elements according to its equality comparer,
// but if they‘re using different equality comparers, then assumption of uniqueness
// will fail. So first check if other is a hashset using the same equality comparer;
// symmetric except is a lot faster and avoids bit array allocations if we can assume
// uniqueness
if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
{
SymmetricExceptWithUniqueHashSet(otherAsSet);
}
else
{
SymmetricExceptWithEnumerable(other);
}
}
///
/// 除去多余的容器
///
public void TrimExcess()
{
Debug.Assert(m_count >= 0, "m_count is negative");
if (m_count == 0)//如果计数为零,清除引用
{
m_buckets = null;
m_slots = null;
m_version++;
}
else
{
Debug.Assert(m_buckets != null, "m_buckets was null but m_count > 0");
int newSize = HashHelpers.GetPrime(m_count);//获取最小适用素数
Slot[] newSlots = new Slot[newSize];
int[] newBuckets = new int[newSize];
int newIndex = 0;
for (int i = 0; i = 0)
{
newSlots[newIndex] = m_slots[i];//拷贝到新容器
int bucket = newSlots[newIndex].hashCode % newSize;//重现获取hash桶索引
newSlots[newIndex].next = newBuckets[bucket] - 1;//设置新容器的next
newBuckets[bucket] = newIndex + 1;//设置新hash桶索引
newIndex++;
}
}
Debug.Assert(newSlots.Length
/// 合并
///
///
public void UnionWith(IEnumerable other)
{
if (other == null)
{
throw new ArgumentNullException("other");
}
foreach (T item in other)
{
AddIfNotPresent(item);//添加项
}
}
///
/// 初始化容器
///
///
private void Initialize(int capacity)
{
Debug.Assert(m_buckets == null, "Initialize was called but m_buckets was non-null");
int size = HashHelpers.GetPrime(capacity);//去最小质数容器
m_buckets = new int[size];//初始化指定大小Hash桶
m_slots = new Slot[size];
}
///
/// 添加如果值不存在
///
///
///
private bool AddIfNotPresent(T value)
{
if (m_buckets == null)
{
Initialize(0);
}
int hashCode = InternalGetHashCode(value);//获取整数Hash code 例如:978656418
int bucket = hashCode % m_buckets.Length;//求与 获取存放位置 例如长度为8 求余为2
int collisionCount = 0;//冲突数量
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next)
{
//遍历第一次 获取数据容器m_slots[2]值
//判断当前位置已存有相同的值
//遍历第二次 取m_slots[i].next 是否大于-1
//判断当前位置是否冲突 大于等于0 即代表冲突
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value))
{
return false;//容器中已经有相等的值 添加失败
}
collisionCount++;//冲突增加
}
int index;
if (m_freeList >= 0)//有空闲位置
{
index = m_freeList;
m_freeList = m_slots[index].next;
}
else
{
if (m_lastIndex == m_slots.Length)
{
IncreaseCapacity();//调整大小
bucket = hashCode % m_buckets.Length;////重现获取hashCode在buckets中存放在位置
}
index = m_lastIndex;
m_lastIndex++;
}
//存数据
m_slots[index].hashCode = hashCode;
m_slots[index].value = value;
m_slots[index].next = m_buckets[bucket] - 1;
//存索引 存的时候+1 取得时候要-1
m_buckets[bucket] = index + 1;
m_count++;
m_version++;
if (collisionCount > 100 && (m_comparer == null || m_comparer == EqualityComparer.Default))
{
m_comparer = EqualityComparer.Default;//使用新的对比hash函数
SetCapacity(m_buckets.Length, true);
}
return true;
}
///
/// 判断是相等对比函数是否相等
///
///
///
///
private static bool AreEqualityComparersEqual(MyHashSet set1, MyHashSet set2)
{
return set1.Comparer.Equals(set2.Comparer);
}
///
/// 内部获取Hash Code
///
///
///
private int InternalGetHashCode(T item)
{
if (item == null)
{
return 0;
}
return m_comparer.GetHashCode(item) & Lower31BitMask;//忽略符号位
}
///
/// 夸大容量
///
private void IncreaseCapacity()
{
Debug.Assert(m_buckets != null, "IncreaseCapacity called on a set with no elements");
int newSize = HashHelpers.ExpandPrime(m_count);//扩大两倍
if (newSize
/// 设置容量大小
///
///
///
private void SetCapacity(int newSize, bool forceNewHashCodes)
{
if (!HashHelpers.IsPrime(newSize))
{
throw new ArgumentException("New size is not prime!");
}
if (m_buckets == null)
{
throw new ArgumentException("SetCapacity called on a set with no elements");
}
Slot[] newSlots = new Slot[newSize];
if (m_slots != null)
{
Array.Copy(m_slots, 0, newSlots, 0, m_lastIndex);
}
if (forceNewHashCodes)
{
for (int i = 0; i
/// 删除不相交
///
///
private void IntersectWithHashSetWithSameEC(MyHashSet other)
{
for (int i = 0; i = 0)
{
T item = m_slots[i].value;
if (!other.Contains(item))
{
Remove(item);
}
}
}
}
private unsafe void IntersectWithEnumerable(IEnumerable other)
{
Debug.Assert(m_buckets != null, "m_buckets shouldn‘t be null; callers should check first");
// keep track of current last index; don‘t want to move past the end of our bit array
// (could happen if another thread is modifying the collection)
int originalLastIndex = m_lastIndex;
int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex);
BitHelper bitHelper;
if (intArrayLength = 0)
{
bitHelper.MarkBit(index);
}
}
// if anything unmarked, remove it. Perf can be optimized here if BitHelper had a
// FindFirstUnmarked method.
for (int i = 0; i = 0 && !bitHelper.IsMarked(i))
{
Remove(m_slots[i].value);
}
}
}
///
/// 内部查找索引
///
///
///
private int InternalIndexOf(T item)
{
Debug.Assert(m_buckets != null, "m_buckets was null; callers should check first");
int hashCode = InternalGetHashCode(item);
for (int i = m_buckets[hashCode % m_buckets.Length] - 1; i >= 0; i = m_slots[i].next)
{
if ((m_slots[i].hashCode) == hashCode && m_comparer.Equals(m_slots[i].value, item))
{
return i;
}
}
// wasn‘t found
return -1;
}
private bool IsSubsetOfHashSetWithSameEC(MyHashSet other)
{
foreach (T item in this)
{
if (!other.Contains(item))
{
return false;
}
}
return true;
}
private bool ContainsAllElements(IEnumerable other)
{
foreach (T element in other)
{
if (!Contains(element))
{
return false;
}
}
return true;
}
private void SymmetricExceptWithUniqueHashSet(MyHashSet other)
{
foreach (T item in other)
{
if (!Remove(item))
{
AddIfNotPresent(item);
}
}
}
private unsafe void SymmetricExceptWithEnumerable(IEnumerable other)
{
int originalLastIndex = m_lastIndex;
int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex);
BitHelper itemsToRemove;
BitHelper itemsAddedFromOther;
if (intArrayLength = 0; i = m_slots[i].next)
{
if (m_slots[i].hashCode == hashCode && m_comparer.Equals(m_slots[i].value, value))
{
location = i;
return false; //already present
}
}
int index;
if (m_freeList >= 0)
{
index = m_freeList;
m_freeList = m_slots[index].next;
}
else
{
if (m_lastIndex == m_slots.Length)
{
IncreaseCapacity();
// this will change during resize
bucket = hashCode % m_buckets.Length;
}
index = m_lastIndex;
m_lastIndex++;
}
m_slots[index].hashCode = hashCode;
m_slots[index].value = value;
m_slots[index].next = m_buckets[bucket] - 1;
m_buckets[bucket] = index + 1;
m_count++;
m_version++;
location = index;
return true;
}
private unsafe ElementCount CheckUniqueAndUnfoundElements(IEnumerable other, bool returnIfUnfound)
{
ElementCount result;
// need special case in case this has no elements.
if (m_count == 0)
{
int numElementsInOther = 0;
foreach (T item in other)
{
numElementsInOther++;
// break right away, all we want to know is whether other has 0 or 1 elements
break;
}
result.uniqueCount = 0;
result.unfoundCount = numElementsInOther;
return result;
}
Debug.Assert((m_buckets != null) && (m_count > 0), "m_buckets was null but count greater than 0");
int originalLastIndex = m_lastIndex;
int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex);
BitHelper bitHelper;
if (intArrayLength = 0)
{
if (!bitHelper.IsMarked(index))
{
// item hasn‘t been seen yet
bitHelper.MarkBit(index);
uniqueFoundCount++;
}
}
else
{
unfoundCount++;
if (returnIfUnfound)
{
break;
}
}
}
result.uniqueCount = uniqueFoundCount;
result.unfoundCount = unfoundCount;
return result;
}
public struct Enumerator : IEnumerator, System.Collections.IEnumerator
{
private MyHashSet set;
private int index;
private int version;
private T current;
internal Enumerator(MyHashSet set)
{
this.set = set;
index = 0;
version = set.m_version;
current = default(T);
}
public void Dispose()
{
}
public bool MoveNext()
{
if (version != set.m_version)
{
throw new InvalidOperationException("version != set.m_version");
}
while (index = 0)
{
current = set.m_slots[index].value;
index++;
return true;
}
index++;
}
index = set.m_lastIndex + 1;
current = default(T);
return false;
}
public T Current
{
get
{
return current;
}
}
Object System.Collections.IEnumerator.Current
{
get
{
if (index == 0 || index == set.m_lastIndex + 1)
{
throw new InvalidOperationException("index == 0 || index == set.m_lastIndex + 1");
}
return Current;
}
}
void System.Collections.IEnumerator.Reset()
{
if (version != set.m_version)
{
throw new InvalidOperationException("version != set.m_version");
}
index = 0;
current = default(T);
}
}
}
using System;
public class HashHelpers
{
public static readonly int[] primes = {
3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};
///
/// 获取质数
///
/// 最小值
///
public static int GetPrime(int min)
{
if (min = min) return prime;
}
//outside of our predefined table.
//compute the hard way.
for (int i = (min | 1); i
/// 判断是否为质数(素数)
///
///
///
public static bool IsPrime(int candidate)
{
//与0000 0001位运算 可以把奇数偶数 刷选出来,因为偶数不是质数(2除外)
if ((candidate & 1) != 0)
{
int limit = (int)Math.Sqrt(candidate);//求根号 相当于乘法中的中位数
for (int divisor = 3; divisor 0x7FEFFFFD && 0x7FEFFFFD > oldSize)
{
throw new Exception("oldSize 异常");
}
return GetPrime(newSize);
}
}
C# HashSet源码分享 自定义HashSet
标签:run cache mit nal void ignore asc case dict
原文地址:https://www.cnblogs.com/zouqiang/p/13230047.html