Bitmap and Bron Filters

Source: Internet
Author: User


Give 4 billion unsigned integers that are not duplicated, not ordered. To an unsigned integer, how to quickly determine whether a number is in these 4 billion numbers. How can we solve this problem?


"Bitmap Method":

Bitmap (BITMAP)

is to use each bits of each data in an array to indicate whether a number exists. 1 means presence, 0 means no.

The equivalent of dividing the array into chunks of space, each of which is 32 bits.

The original 32 bits put a data, now a bit can put a data. 16GB/32=0.5GB=512MB.


#ifndef __bitmap_h__
#define __bitmap_h__
#include <iostream>
using namespace Std;

#include <vector>

Class BitMap
{
Public
BitMap (size_t size = 0)
: _size (0)
{
_a Open more space, such as size=36/32=1, need two space to put down
_a.resize ((Size >> 5) + 1);
}


void Set (size_t x)
{
size_t index = X/32;
size_t index = (x >> 5);
size_t num = x 32;

if (! ( _a[index] & (1 << num) indicates that the bits does not exist, then the bit binary is set to 1
if (! ( _a[index] & (1 << num)))
{
_a[index] |= (1 << num);
++_size;
}
}


void Reset (size_t x)
{
size_t index = X/32;
size_t index = x >> 5;
size_t num = x 32;

The bit is present and the bit binary is set to 0
if (_a[index] & (1 << num))
{
_a[index] &= ~ (1 << num);
--_size;
}
}


BOOL Test (size_t x)
{
size_t index = X/32;
size_t index = x >> 5;
size_t num = x 32;
if (_a[index] & (1 << num))
{
return true;
}
return false;
}


void Resize (size_t size)
{
_a.resize (size);
}
Private
Vector<size_t> _a;
size_t _size;
};

#endif//__bitmap_h__


"Fabric filter" (Imitation function implementation, select 5 bitmaps)


#define _CRT_SECURE_NO_WARNINGS 1
#ifndef __common__
#define __COMMON__

size_t _getnewsize (size_t _size)
{
static const int _primesize = 28;
static const unsigned long _primelist[_primesize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};

for (int i = 0; i < _primesize; i++)
{
if (_primelist[i]> _size)
{
return _primelist[i];
}
}
return _primelist[_primesize-1];
}


Template<class t>
struct __HASHFUNC1
{
size_t bkdrhash (const char *STR)
{
Register size_t hash = 0;
while (size_t ch = (size_t) *str++)
{
hash = hash * 131 + ch; can also be multiplied by 31, 131, 1313, 13131, 131313.

}
return hash;
}

size_t operator () (const t& key)
{
Return Bkdrhash (Key.c_str ());
}
};

Template<class t>
struct __HASHFUNC2
{
size_t sdbmhash (const char *STR)
{
Register size_t hash = 0;
while (size_t ch = (size_t) *str++)
{
hash = 65599 * hash + ch;
hash = (size_t) ch + (hash << 6) + (hash << +)-hash;
}
return hash;
}

size_t operator () (const t& key)
{
Return Sdbmhash (Key.c_str ());
}
};


Template<class t>
struct __HASHFUNC3
{
size_t rshash (const char *STR)
{
Register size_t hash = 0;
size_t magic = 63689;
while (size_t ch = (size_t) *str++)
{
hash = hash * magic + ch;
Magic *= 378551;
}
return hash;
}

size_t operator () (const t& key)
{
Return Rshash (Key.c_str ());
}
};


Template<class t>
struct __HASHFUNC4
{
    size_t jshash (const char *STR)
    {
        if (!*str)         //This is added by myself to ensure that the empty string returns the hash value 0 
            return 0;
        Register size_t hash = 1315423911;
        while (size_t ch = (size_t) *str++)
         {
            hash ^= (Hash << 5) + ch + ( Hash >> 2));
       }
        return hash;
   }

    size_t operator () (const t& key)
    {
         return Jshash (Key.c_str ());
   }
};


Template<class t>
struct __HASHFUNC5
{
    size_t dekhash (const char* str)
    {
        if (!*str)         //This is added by myself to ensure that the empty string returns the hash value 0 
            return 0;
        Register size_t hash = 1315423911;
        while (size_t ch = (size_t) *str++)
         {
            hash = (hash << 5) ^ (hash >>) ^ ch;
       }
        return hash;
   }

size_t operator () (const t& key)
{
Return Dekhash (Key.c_str ());
}
};

#endif//__common__


Bitmap and Bron Filters

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.