Primary objective: to find a hash function that supports 64-bit integer computation efficiently, so that the efficiency in speed and space is higher than that of other function functions, and 32-bit integer computation in internal computation.
Tested "rshash", "jshash", "pjwhash", "elfhash", "bkdrhash", "sdbmhash", "djbhash", "dekhash", "bphash ", "fnvhash" and "aphash" are used to chain hash tables, and the bucket utilization and collision rate. The document for result analysis is lost (it is a lesson, and the work is not summarized in time)
Disadvantages: the time and space efficiency of hash functions are not analyzed, as well as the randomness or normality of input and output.
Self-designed hash function implementation:
Unsigned long matrix [] = {
Listen, listen, 8048017138412720916,153 32027341811451737l, weight, 8116965580610093270,763 weight, 3639441384019100182,152 weight, 8509997149940110407,756 weight, weight, 4605006862157952628,501 6443212556328023, 9002359712050550171,663 weight, 2225254567237014321,146 weight, weight, 5688277523122870273,604 4889914109049598, 14531873893893142015l, 201754372033167844835l, 394252017736706278,215 8134086325640502, 1901858137268079707,124 39218790442925755l, 41528840860977635074l, 1248934315101581618 };
Void changetoarray (unsigned long input, int * array, int Len ){
Len = 64;
For (INT I = 0; I <Len; I ++ ){
If (input> I) & 0x1) = 1 ){
Array [I] = 1;
} Else {
Array [I] = 0;
}
}
}
Int matrix [32] [64];
Void changetomatrix (){
For (INT I = 0; I <32; I ++)
For (Int J = 0; j <64; j ++ ){
If (Matrix [I]> J) & 0x1) = 1 ){
Matrix [I] [J] = 1;
} Else {
Matrix [I] [J] = 0;
}
}
}
Unsigned int matrixhash (unsigned long input ){
Int TMP [64];
Changetoarray (input, TMP, 64 );
Int res [32];
Int tmpres [32];
For (Int J = 0; j <64; j ++ ){
For (int K = 0; k <32; k ++ ){
Res [k] = res [k] ^ tmpres [k];
Tmpres [k] = 0;
}
For (INT I = 0; I <32; I ++ ){
If (TMP [J] = 0 ){
Tmpres [I] = 0;
} Else {
Tmpres [I] = matrix [I] [J];
}
}
}
}
Improved Version:
Unsigned long matrix [] = {
694164548,378 records, 3807463199,268 4797435, 2359943013,223 1240996, 2135863124,121 1164704, 2302089482,410 5647604, 3076642034,721 61852, 59560020,611 878035, 2814490697,391 5797072, 1219075273,397 8906545, 193953705,413 2630722, 2627050652,198 9142569, 2745032496,735 records, 3485798578,163 7027010, 2467907528,311 records, 1201254602,373 7777921, 796676896,234 9326933, 2449810837,391 0433321, 3550767862,164 4342526, 1438602584,217 9832898, 3868189175,181 4084994, 615833333,203 1832363, 3130167043,420 0349959, 1930419194,378 7258680, 3369981751,332 9843958, 3446569769,385 records, 163055231,100 records, 422846498,101 1763997, 533633029,255 7610330, 4221463260,950 17657, 88369066,322 7540105, 3815919250,223 4633741, 1819183943,556 442040
};
Void changetoarray (unsigned long input, int * array, int Len ){
Len = 64;
For (INT I = 0; I <Len; I ++ ){
If (input> I) & 0x1) = 1 ){
Array [I] = 1;
} Else {
Array [I] = 0;
}
}
}
Int matrix [32] [64];
Void changetomatrix (){
For (INT I = 0; I <32; I ++)
For (Int J = 0; j <64; j ++ ){
If (Matrix [I]> J) & 0x1) = 1 ){
Matrix [I] [J] = 1;
} Else {
Matrix [I] [J] = 0;
}
}
}
Unsigned int matrixhash (unsigned long input ){
Int TMP [64];
Changetoarray (input, TMP, 64); // you do not need to change the number of integers.
// For (INT I = 0; I <64; I ++)
// Cout <TMP [I];
Unsigned int res = 0;
For (INT I = 0; I <64; I ++ ){
If (TMP [I] = 1) {// although it can be parallel, the overhead of parallel thread switching may be far greater than that of the thread itself due to its simple operation
Res = res ^ matrix [I];// The random array cannot determine whether it is feasible or efficient, and the row is full-rank.
}
}
Return res;
}
/*************************************** **********************************
> File name: hash_table.cpp
> Author: wjy
> Mail: [email protected]
> Created time: Second 7/8 09:54:52 2014
**************************************** ********************************/
# Include <iostream>
# Include <time. h>
# Include <string. h>
# Include <ctime>
# Include <stdlib. h>
# Include <stdio. h>
Using namespace STD;
Unsigned int rshash (char * STR, unsigned int Len)
{
Unsigned int B = 378551;
Unsigned int A = 63689;
Unsigned int hash = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = hash * A + (* Str );
A = A * B;
}
Return hash;
}
/* End of RS hash function */
Unsigned int jshash (char * STR, unsigned int Len)
{
Unsigned int hash = 1315423911;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash ^ = (hash <5) + (* Str) + (hash> 2 ));
}
Return hash;
}
/* End of JS hash function */
Unsigned int pjwhash (char * STR, unsigned int Len)
{
Const unsigned int bitsinunsignedint = (unsigned INT) (sizeof (unsigned INT) * 8 );
Const unsigned int threequarters = (unsigned INT) (bitsinunsignedint * 3)/4 );
Const unsigned int oneeighth = (unsigned INT) (bitsinunsignedint/8 );
Const unsigned int highbits = (unsigned INT) (0 xffffffff) <(bitsinunsignedint-oneeighth );
Unsigned int hash = 0;
Unsigned int test = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (hash <oneeighth) + (* Str );
If (test = hash & highbits )! = 0)
{
Hash = (hash ^ (test> threequarters ))&(~ Highbits ));
}
}
Return hash;
}
/* End of P. J. Weinberger hash function */
Unsigned int elfhash (char * STR, unsigned int Len)
{
Unsigned int hash = 0;
Unsigned int x = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (hash <4) + (* Str );
If (x = hash & 0xf0000000l )! = 0)
{
Hash ^ = (x> 24 );
}
Hash & = ~ X;
}
Return hash;
}
/* End of ELF hash function */
Unsigned int bkdrhash (char * STR, unsigned int Len)
{
Unsigned int seed = 131;/* 31 131 1313 13131 131313 Etc ..*/
Unsigned int hash = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (hash * seed) + (* Str );
}
Return hash;
}
/* End of bkdr hash function */
Unsigned int sdbmhash (char * STR, unsigned int Len)
{
Unsigned int hash = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (* Str) + (hash <6) + (hash <16)-Hash;
}
Return hash;
}
/* End of sdbm hash function */
Unsigned int djbhash (char * STR, unsigned int Len)
{
Unsigned int hash = 5381;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (hash <5) + hash) + (* Str );
}
Return hash;
}
/* End of djb hash function */
Unsigned int dekhash (char * STR, unsigned int Len)
{
Unsigned int hash = Len;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = (hash <5) ^ (hash> 27) ^ (* Str );
}
Return hash;
}
/* End of Dek hash function */
Unsigned int bphash (char * STR, unsigned int Len)
{
Unsigned int hash = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash = hash <7 ^ (* Str );
}
Return hash;
}
/* End of BP hash function */
Unsigned int fnvhash (char * STR, unsigned int Len)
{
Const unsigned int fnv_prime = 0x811c9dc5;
Unsigned int hash = 0;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash * = fnv_prime;
Hash ^ = (* Str );
}
Return hash;
}
/* End of FNV hash function */
Unsigned int aphash (char * STR, unsigned int Len)
{
Unsigned int hash = 0 xaaaaaaaa;
Unsigned int I = 0;
For (I = 0; I <Len; STR ++, I ++)
{
Hash ^ = (I & 1) = 0 )? (Hash <7) ^ (* Str) * (hash> 3 )):
(~ (Hash <11) + (* Str) ^ (hash> 5 )));
}
Return hash;
}
/* End of AP hash function */
Const int num = 1000003; // 10000019
Struct hashnode {
Unsigned int key;
Unsigned int value;
Hashnode * next;
Bool flag;
Hashnode (){
Flag = false;
Next = NULL;
Key = 0;
Value = 0;
}
Hashnode (INT key, int value): Key (key), value (value ){
Next = NULL;
Flag = false;
}
};
Hashnode hnode [num];
Unsigned int (* hashvalue) (char *, unsigned INT );
Void sethash (INT choice ){
Switch (choice ){
Case 1: hashvalue = rshash;
Break;
Case 2: hashvalue = jshash;
Break;
Case 3: hashvalue = pjwhash;
Break;
Case 4: hashvalue = elfhash;
Break;
Case 5: hashvalue = bkdrhash;
Break;
Case 6: hashvalue = sdbmhash;
Break;
Case 7: hashvalue = djbhash;
Break;
Case 8: hashvalue = dekhash;
Break;
Case 9: hashvalue = bphash;
Break;
Case 10: hashvalue = fnvhash;
Break;
Case 11: hashvalue = aphash;
Break;
Default:
Break;
}
}
/* Unsigned int hashvalue (char * Str ){
Register unsigned int h;
Register unsigned char * P;
For (H = 0, P = (unsigned char *) STR; * P; P ++ ){
H = (H <5)-H + (* P); // H = 31 * H + * P;
}
Return h;
}*/
Bool iscontainskey (INT key ){
Char TMP [65];
// ITOA (Key, TMP, 10 );
Sprintf (TMP, "% d", key );
Int Len = strlen (TMP );
Unsigned int hashcode = hashvalue (TMP, Len) % num;
If (hnode [hashcode]. Flag = false ){
Return false;
} Else {
Hashnode * P = & hnode [hashcode];
While (P! = NULL ){
If (p-> key = Key ){
Return true;
}
P = p-> next;
}
}
Return false;
}
Void put (INT key, int value ){
Char TMP [65];
// ITOA (Key, TMP, 10 );
Sprintf (TMP, "% d", key );
Int Len = strlen (TMP );
Unsigned int hashcode = hashvalue (TMP, Len) % num;
If (hnode [hashcode]. Flag = false ){
Hnode [hashcode]. Flag = true;
Hnode [hashcode]. Key = key;
Hnode [hashcode]. value = value;
} Else if (! Iscontainskey (key )){
Hashnode * P = new hashnode (Key, value );
P-> flag = true;
P-> next = hnode [hashcode]. Next;
Hnode [hashcode]. Next = P;
} Else {
Hashnode * P = & hnode [hashcode];
While (P! = NULL ){
If (p-> key = Key ){
P-> value = value;
Break;
}
P = p-> next;
}
}
}
Int get (INT key ){
Char TMP [65];
// ITOA (Key, TMP, 10 );
Sprintf (TMP, "% d", key );
Int Len = strlen (TMP );
Unsigned int hashcode = hashvalue (TMP, Len) % num;
If (iscontainskey (key) = false ){
// Alert ("the key not exists ");
Return-1;
} Else {
Hashnode * P = & hnode [hashcode];
While (P! = NULL ){
If (p-> key = Key ){
Return p-> value;
}
P = p-> next;
}
}
Return-1;
}
Int main (){
// Int seed = Time (null );
Cout <"**************** hash function test ******************" <Endl;
Printf ("general purpose hash function algorithms test \ n ");
Printf ("1. RS-Hash function value \ n ");
Printf ("2. js-Hash function value \ n ");
Printf ("3. pjw-Hash function value \ n ");
Printf ("4. Elf-Hash function value \ n ");
Printf ("5. bkdr-Hash function value \ n ");
Printf ("6. sdbm-Hash function value \ n ");
Printf ("7. djb-Hash function value \ n ");
Printf ("8. Dek-Hash function value \ n ");
Printf ("9. BP-Hash function value \ n ");
Printf ("10. FNV-Hash function value \ n ");
Printf ("11. AP-Hash function value \ n ");
String hash_names [] = {"0", "rshash", "jshash", "pjwhash", "elfhash", "bkdrhash", "sdbmhash", "djbhash ", "dekhash", "bphash", "fnvhash", "aphash "};
String count_names [] = {"0", "rshash_count", "jshash_count", "clerk", "elfhash_count", "bkdrhash_count", "sdbmhash_count", "djbhash_count ", "dekhash_count", "bphash_count", "fnvhash_count", "aphash_count "};
For (INT I = 1; I <= 11; I ++ ){
Sethash (I );
File * fp = fopen ("data.txt", "R ");
If (FP = NULL ){
Cout <"end" <Endl;
Exit (-1 );
}
Char key [65];
While (! Feof (FP )){
Fscanf (FP, "% s", key );
// Cout <key <Endl;
// I ++;
Put (ATOL (key), 1 );
}
Fclose (FP );
Fp = fopen (hash_names [I]. c_str (), "W ");
File * fp2 = fopen (count_names [I]. c_str (), "W ");
Int sum = 0;
For (INT I = 0; I <num; I ++ ){
Int COUNT = 0;
Hashnode * P = & hnode [I];
While (P! = NULL & P-> flag! = False ){
Count ++;
Fprintf (FP, "% d-> % d \ n", p-> key, p-> value );
P = p-> next;
}
If (count> 0 ){
Sum + = 1;
}
Fprintf (FP, "***************** \ n ");
If (count> = 1 ){
// Cout <count <Endl;
// Fprintf (fp2, "% d th node has % d collision \ n", I, count );
Fprintf (fp2, "% d \ n", count );
}
}
Cout <count_names [I] <"\ t" <sum <"\ t" <num <Endl;
Fclose (fp2 );
Fclose (FP );
}
Return 0;
}
Result measurement code:
Python generates 64-bit random number input (think about how to ensure that there is no repeated random number, use random sorting to generate and Bubble Sorting)
#! /Bin/Python
Import random
Import OS
Array = []
For I in range (32 ):
Array. append (random. randint (0, 0 xffffffffffffffff ))
Matrix = open ("matrix.txt", "W ");
For ele in array:
Matrix. Write (STR (Ele ));
Matrix. Write ('\ n ');
Matrix. Close ()
Data = open ("data.txt", "R ");
Larray = []
For I in range (64 ):
Larray. append (random. randint (0, 0 xffffffff ))
Lmatrix = open ("lmatrix.txt", "W ");
For ele in larray:
Lmatrix. Write (STR (Ele ));
Lmatrix. Write ('\ n ');
Lmatrix. Close ()
Statistical Code:
1. Sorting
Cat $1 | sort-N | uniq-C> $2
2. Statistics on all files
#! /Bin/sh
Dir = $1
Filelist = 'ls $ dir'
Array = ($ filelist)
If [-D tmp_result]
Then
Rm-RF tmp_result
Fi
Mkdir tmp_result
For (I = 0; I <$ {# array [@]}; I ++); do
Sh sort. Sh $ dir $ {array [$ I]}./tmp_result/$ {array [$ I]}
Echo $ I
Done
3. Sum
#! /Bin/Python
Import sys
Import OS
If Len (SYS. argv) <2:
Print "Python sum. py file"
Exit (0)
Sum = 0
Fp = open (SYS. argv [1], "R ")
For line in FP:
Sum + = int (line. Strip ())
Print sum
Further reference document: http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed (very good analysis and many references)
An integer Hash Algorithm with very high speed and relatively acceptable strength: http://burtleburtle.net/bob/hash/integer.html