[原创]大数据:布隆过滤器C#版简单实现。

2021-06-28 16:05

阅读:448

标签:bitarray   abi   ==   soscw   readline   string   start   hash   summary   

    public class BloomFilter
    {
        public BitArray _BloomArray;
        public Int64 BloomArryLength { get; }
        public Int64 DataArrayLeng { get; }
        public Int64 BitIndexCount { get; }

        /// 
        /// 初始化
        /// 
        /// 布隆数组的大小
        /// 数据的长度
        /// hash数
        public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount)
        {
            _BloomArray = new BitArray(BloomArryLength);
            this.BloomArryLength = BloomArryLength;
            this.DataArrayLeng = DataArrayLeng;
            this.BitIndexCount = bitIndexCount;
        }

        
        public void Add(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i )
            {
                var c = random.Next((int)(this.BloomArryLength - 1));
                _BloomArray[c] = true;
            }
        }

        public bool isExist(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i )
            {
                if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))])
                {
                    return false;
                }
            }
            return true;
        }

        public int GetHashCode(object value)
        {
            return value.GetHashCode();
        }

        public double getFalsePositiveProbability()
        {
            // (1 - e^(-k * n / m)) ^ k
            return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)),
                    BitIndexCount);
        }
    }

 

        static void Main(string[] args)
        {
            Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千万条数据

            for (int i = 0; i )//五千万条数据
            {
                bloom.Add(i.ToString());
            }
            do
            {
                var c = Console.ReadLine();
                if (c == "e")
                    break;
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var temp=bloom.isExist(c);
                sw.Stop();
                Console.WriteLine($"查找:{c}\n结果:{temp}\n总耗时:{sw.ElapsedTicks}\n错误概率:{bloom.getFalsePositiveProbability()}");
            } while (true);
        }

结果:使用内存27MB,查找结果一般在100毫秒以内。

技术分享图片

 

[原创]大数据:布隆过滤器C#版简单实现。

标签:bitarray   abi   ==   soscw   readline   string   start   hash   summary   

原文地址:https://www.cnblogs.com/yueyue184/p/10037587.html


评论


亲,登录后才可以留言!