questions raised
We often use it in the project to detect if an array B has the data in array A.
Scenario 1: Put the data in B into a bitmap and then use the data in a to probe.
Cons: If the data in B is sparse, such as [1,2,900,800], the bitmap is expensive because a bit is needed to identify the number that does not appear.
Scenario 2: The data in B is stored in a hash table and then detected using the data in a.
Blizzard hash principle see I reproduced before the article Blizzard hash
Taking Unordered_set as an example, it takes a lot of space to have all the data present in set. If the revised Blizzard hash algorithm is used, then a BOOL field can be used to identify the integer.
Source Reference Source
The source code of the integer hash function selection is as follows:
//header file #ifndef _uint64hash_h #define _UINT64HASH_H #include <string> using namespace std; #define MAXTABLELEN 1024//default hash Index Table size////////////////////////////////////////////////////////////////////////////
The hash Index table defines the typedef struct _HASHTABLE {uint64_t Nhasha;
uint64_t NHASHB;
BOOL bexists;
}hashtable, *phashtable;
Class Uint64hash {Public:uint64hash (const long ntablelength = Maxtablelen);
~uint64hash (void);
private:uint64_t crypttable[0x500]; uint64_t m_tablelength;
Hash Index table length HASHTABLE *m_hashindextable; Private:void initcrypttable ();
Preprocessing of the hash Index table uint64_t HashUint64 (uint64_t key);
uint64_t hashuint64a (uint64_t key);
uint64_t hashuint64b (uint64_t key);
public:uint64_t Hash (uint64_t val); BOOL Hashed (uint64_t val);
Detects if the URL has been hashed}; #endif
#include <iostream> #include "Uint64Hash.h"//#include <boost/timer.hpp> #include <string> #include &
Lt;stdlib.h> using namespace std;
Uint64hash::uint64hash (const long ntablelength/*= maxtablelen*/) {initcrypttable ();
M_tablelength = Ntablelength;
Initialize hash Table m_hashindextable = new Hashtable[ntablelength];
for (int i = 0; i < ntablelength; i++) {M_hashindextable[i].nhasha =-1;
M_HASHINDEXTABLE[I].NHASHB =-1;
M_hashindextable[i].bexists = false; }} uint64hash::~uint64hash (void) {//Clean memory if (NULL! = m_hashindextable) {Delete []m
_hashindextable;
m_hashindextable = NULL;
m_tablelength = 0; }}/************************************************************************//* Function name: initcrypttable * Function: Preprocessing of hash Index Table *
return value: No ************************************************************************/void uint64hash::initcrypttable () {
uint64_t seed = 0x00100001, index1 = 0, Index2 = 0, I; for (index1 = 0; index1 < 0x100; index1++) {for (Index2 = index1, i = 0; i < 5; i++, index2 + = 0x
(+) {uint64_t temp1, temp2;
Seed = (Seed * + 3)% 0x2aaaab;
Temp1 = (Seed & 0xFFFF) << 0x10;
Seed = (Seed * + 3)% 0x2aaaab;
Temp2 = (seed & 0xFFFF);
CRYPTTABLE[INDEX2] = (Temp1 | temp2);
}}}/************************************************************************//* Function name: HashUint64 * Function: hash value is obtained * Return value: Return hash value ************************************************************************/uint64_t Uint64Hash::
HashUint64 (uint64_t key) {key = (~key) + (key <<);//key = (key <<)-key-1;
Key = key ^ (key >> 24); Key = (key + (key << 3)) + (key << 8);
Key * 265 key = key ^ (key >> 14); Key = (kEY + (Key << 2) + (key << 4);
Key * key = key ^ (key >> 28);
Key = key + (key << 31);
Return key; } uint64_t uint64hash::hashuint64a (uint64_t key) {key = (~key) + (key <<);//key = (Key << 18)
-Key-1;
Key = key ^ (key >> 31); Key = key * 21;
Key = (key + (key << 2)) + (key << 4);
Key = key ^ (key >> 11);
Key = key + (key << 6);
Key = key ^ (key >> 22);
Return key; } uint64_t uint64hash::hashuint64b (uint64_t key) {key = ~key + (key <<);//key = (Key << 15)-
Key-1;
Key = key ^ (key >> 12);
Key = key + (key << 2);
Key = key ^ (key >> 4); Key = key * 2057;
Key = (key + (key << 3)) + (key << 11);
Key = key ^ (key >> 16);
Return key; }/************************************************************************//* Function name: Hashed * Function: Detects if a string has been hashed * Return value: if present, 1; otherwise, return 0 *******/bool Uint64hash::hashed (uint64_t key) {//Not
The same string three-time hash will also collide with an infinite probability close to the impossible uint64_t Nhash = HashUint64 (key);
uint64_t Nhasha = hashuint64a (key);
uint64_t NHASHB = hashuint64b (key);
uint64_t Nhashstart = nhash% m_tablelength, nhashpos = Nhashstart; while (m_hashindextable[nhashpos].bexists) {if (M_hashindextable[nhashpos].nhasha = = Nhasha &&
M_HASHINDEXTABLE[NHASHPOS].NHASHB = = NHASHB) return 1;
else Nhashpos = (nhashpos + 1)% M_tablelength;
if (Nhashpos = = Nhashstart) break; } return 0; Not found}/************************************************************************//* Function Name: hash * Function: Hash A String * return value: Success, return position, failure, return-1 ************************************************************************/uint64_t Uint64Hash:: Hash (uint64_t key) {uint64_t Nhash = HashUint64(key);
uint64_t Nhasha = hashuint64a (key);
uint64_t NHASHB = hashuint64b (key);
uint64_t Nhashstart = nhash% m_tablelength, nhashpos = Nhashstart;
while (m_hashindextable[nhashpos].bexists) {Nhashpos = (nhashpos + 1)% M_tablelength;
if (Nhashpos = = Nhashstart)//A Reincarnation {//hash table has no vacant position, can not complete the hash return-1;
}} m_hashindextable[nhashpos].bexists = true;
M_hashindextable[nhashpos].nhasha = Nhasha;
M_HASHINDEXTABLE[NHASHPOS].NHASHB = NHASHB;
return nhashpos;
} int main () {//Boost::timer t;
const int SIZE = 5000000;
Uint64hash s (size*2);
for (int i = 0; i < SIZE; i++) {S.hash (i);
} for (int i = 0; i < SIZE; i++) {if (!s.hashed (i)) {cout<< "ERROR" <<endl;
Break
}} cout<<t.elapsed () <<endl; for (int i = SIZE; i < 2*size; i++){if (s.hashed (i)) {cout<< "ERROR" << i<<endl;
}}//cout<<t.elapsed () <<endl; }
The Compile command is:
g++-o3–std=c++11
If you need to use a timer in boost to count the time, the compile command is:
unordered_set Comparison of g++-o3–std=c++11 *.cpp-lboost_system and STL
The code for Unordered_set is:
#include <iostream>
#include <unordered_set>
#include <boost/timer.hpp>
using namespace Std;
int main () {
Boost::timer t;
const int SIZE = 5000000;
unordered_set<int> s (SIZE);
for (int i = 0; i < SIZE; i++) {
s.insert (i);
}
for (int i = 0; i < SIZE; i++) {
if (s.end () = = S.find (i)) {
cout<< "ERROR" <<endl;
break;
}
}
cout<<t.elapsed () <<endl;
}
Time, Blizzard's hash function is much slower in time efficiency, after adding the O3 optimization option, on the 500W quantity, Blizzard's insertion and detection time is 1.11s,unordered_set: 0.46s.