Give 4 billion unsigned integers that are not duplicated, not ordered. To an unsigned integer, how to quickly determine whether a number is in these 4 billion numbers. How can we solve this problem?
"Bitmap Method":
Bitmap (BITMAP)
is to use each bits of each data in an array to indicate whether a number exists. 1 means presence, 0 means no.
The equivalent of dividing the array into chunks of space, each of which is 32 bits.
The original 32 bits put a data, now a bit can put a data. 16GB/32=0.5GB=512MB.
#ifndef __bitmap_h__
#define __bitmap_h__
#include <iostream>
using namespace Std;
#include <vector>
Class BitMap
{
Public
BitMap (size_t size = 0)
: _size (0)
{
_a Open more space, such as size=36/32=1, need two space to put down
_a.resize ((Size >> 5) + 1);
}
void Set (size_t x)
{
size_t index = X/32;
size_t index = (x >> 5);
size_t num = x 32;
if (! ( _a[index] & (1 << num) indicates that the bits does not exist, then the bit binary is set to 1
if (! ( _a[index] & (1 << num)))
{
_a[index] |= (1 << num);
++_size;
}
}
void Reset (size_t x)
{
size_t index = X/32;
size_t index = x >> 5;
size_t num = x 32;
The bit is present and the bit binary is set to 0
if (_a[index] & (1 << num))
{
_a[index] &= ~ (1 << num);
--_size;
}
}
BOOL Test (size_t x)
{
size_t index = X/32;
size_t index = x >> 5;
size_t num = x 32;
if (_a[index] & (1 << num))
{
return true;
}
return false;
}
void Resize (size_t size)
{
_a.resize (size);
}
Private
Vector<size_t> _a;
size_t _size;
};
#endif//__bitmap_h__
"Fabric filter" (Imitation function implementation, select 5 bitmaps)
#define _CRT_SECURE_NO_WARNINGS 1
#ifndef __common__
#define __COMMON__
size_t _getnewsize (size_t _size)
{
static const int _primesize = 28;
static const unsigned long _primelist[_primesize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
for (int i = 0; i < _primesize; i++)
{
if (_primelist[i]> _size)
{
return _primelist[i];
}
}
return _primelist[_primesize-1];
}
Template<class t>
struct __HASHFUNC1
{
size_t bkdrhash (const char *STR)
{
Register size_t hash = 0;
while (size_t ch = (size_t) *str++)
{
hash = hash * 131 + ch; can also be multiplied by 31, 131, 1313, 13131, 131313.
}
return hash;
}
size_t operator () (const t& key)
{
Return Bkdrhash (Key.c_str ());
}
};
Template<class t>
struct __HASHFUNC2
{
size_t sdbmhash (const char *STR)
{
Register size_t hash = 0;
while (size_t ch = (size_t) *str++)
{
hash = 65599 * hash + ch;
hash = (size_t) ch + (hash << 6) + (hash << +)-hash;
}
return hash;
}
size_t operator () (const t& key)
{
Return Sdbmhash (Key.c_str ());
}
};
Template<class t>
struct __HASHFUNC3
{
size_t rshash (const char *STR)
{
Register size_t hash = 0;
size_t magic = 63689;
while (size_t ch = (size_t) *str++)
{
hash = hash * magic + ch;
Magic *= 378551;
}
return hash;
}
size_t operator () (const t& key)
{
Return Rshash (Key.c_str ());
}
};
Template<class t>
struct __HASHFUNC4
{
size_t jshash (const char *STR)
{
if (!*str) //This is added by myself to ensure that the empty string returns the hash value 0
return 0;
Register size_t hash = 1315423911;
while (size_t ch = (size_t) *str++)
{
hash ^= (Hash << 5) + ch + ( Hash >> 2));
}
return hash;
}
size_t operator () (const t& key)
{
return Jshash (Key.c_str ());
}
};
Template<class t>
struct __HASHFUNC5
{
size_t dekhash (const char* str)
{
if (!*str) //This is added by myself to ensure that the empty string returns the hash value 0
return 0;
Register size_t hash = 1315423911;
while (size_t ch = (size_t) *str++)
{
hash = (hash << 5) ^ (hash >>) ^ ch;
}
return hash;
}
size_t operator () (const t& key)
{
Return Dekhash (Key.c_str ());
}
};
#endif//__common__
Bitmap and Bron Filters