Preface at the end of the hashtable article on memcached source code reading, I will study the string hash method of memcached. Now, record the results of the study. Jenkinshashjenkins is located at jenkins_hash.c. Little-Endian is the low-level byte which is routed to the low-end address of the memory.
Preface at the end of the hash table article read by memcached source code, I will study the string hash method of memcached. Now, record the results of the study. The position of Jenkins hash jenkins is in jenkins_hash.c. Little-Endian on the big-end and small-end, which means that the low-level bytes are routed to the low-end address of the memory.
Preface
At the end of the hash table article read by memcached source code, I will study the string hash method of memcached.
Now, record the results of the study.
Jenkins hash
Jenkins is located in jenkins_hash.c.
Big-end and small-end
Little-Endian is the low-level byte emission at the low-level address end of the memory, and the high-level byte emission at the high-level address end of the memory.
Big-Endian is the low address of the memory where the high byte is discharged, and the low byte is discharged to the high address of the memory.
For example, the representation of the number 0x12 34 56 78 in the memory is as follows:
1) big end mode: Low address ---------------> high address 0x12 | 0x34 | 0x56 | 0x78 2) Small End mode: low address ----------------> high address 0x78 | 0x56 | 0x34 | 0x12
#if ENDIAN_BIG == 1# define HASH_LITTLE_ENDIAN 0# define HASH_BIG_ENDIAN 1#else# if ENDIAN_LITTLE == 1# define HASH_LITTLE_ENDIAN 1# define HASH_BIG_ENDIAN 0# else# define HASH_LITTLE_ENDIAN 0# define HASH_BIG_ENDIAN 0# endif#endif
Rot macro
The first one is the rot macro.
This macro shifts the number of places left in a loop.
#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
Mix macro
A reversible encryption.
This is reversible, so any information in (a, B, c) before mix () is still in (a, B, c) after mix ().
#define mix(a,b,c) \{ \ a -= c; a ^= rot(c, 4); c += b; \ b -= a; b ^= rot(a, 6); a += c; \ c -= b; c ^= rot(b, 8); b += a; \ a -= c; a ^= rot(c,16); c += b; \ b -= a; b ^= rot(a,19); a += c; \ c -= b; c ^= rot(b, 4); b += a; \}
Final macro
Final mixing of 3 32-bit values (a, B, c) into c
Merge a, B, and c into c.
#define final(a,b,c) \{ \ c ^= b; c -= rot(b,14); \ a ^= c; a -= rot(c,11); \ b ^= a; b -= rot(a,25); \ c ^= b; c -= rot(b,16); \ a ^= c; a -= rot(c,4); \ b ^= a; b -= rot(a,14); \ c ^= b; c -= rot(b,24); \}
Hash Algorithm
In the source code, there are large and small terminals, and they are also divided into 0x3 or 0x1. I don't know what to do now.
uint32_t jenkins_hash( const void *key, size_t length) { uint32_t a,b,c; a = b = c = 0xdeadbeef + ((uint32_t)length) + 0; const char *k = (const char *)key; while (length > 12) { a += ((uint32_t)k[0])<<24; a += ((uint32_t)k[1])<<16; a += ((uint32_t)k[2])<<8; a += ((uint32_t)k[3]); b += ((uint32_t)k[4])<<24; b += ((uint32_t)k[5])<<16; b += ((uint32_t)k[6])<<8; b += ((uint32_t)k[7]); c += ((uint32_t)k[8])<<24; c += ((uint32_t)k[9])<<16; c += ((uint32_t)k[10])<<8; c += ((uint32_t)k[11]); mix(a,b,c); length -= 12; k += 12; } switch(length) { case 12: c+=k[11]; case 11: c+=((uint32_t)k[10])<<8; case 10: c+=((uint32_t)k[9])<<16; case 9 : c+=((uint32_t)k[8])<<24; case 8 : b+=k[7]; case 7 : b+=((uint32_t)k[6])<<8; case 6 : b+=((uint32_t)k[5])<<16; case 5 : b+=((uint32_t)k[4])<<24; case 4 : a+=k[3]; case 3 : a+=((uint32_t)k[2])<<8; case 2 : a+=((uint32_t)k[1])<<16; case 1 : a+=((uint32_t)k[0])<<24; break; case 0 : return c; } final(a,b,c); return c;}
After reading this code, we can shorten it.
uint32_t jenkins_hash( const void *key, size_t length) { uint32_t a,b,c; a = b = c = 0xdeadbeef + ((uint32_t)length) + 0; const char *k = (const char *)key; while (length >= 12) { a += *((uint32_t*)(k+0)); b += *((uint32_t*)(k+4)); c += *((uint32_t*)(k+8)); mix(a,b,c); length -= 12; k += 12; } if(length == 0) { return c; } switch(length) { case 11: c+=((uint32_t)k[10])<<8; case 10: c+=((uint32_t)k[9])<<16; case 9 : c+=((uint32_t)k[8])<<24; case 8 : b += *((uint32_t*)(k+4)); a += *((uint32_t*)(k+0)); break; case 7 : b+=((uint32_t)k[6])<<8; case 6 : b+=((uint32_t)k[5])<<16; case 5 : b+=((uint32_t)k[4])<<24; case 4 : a += *((uint32_t*)(k+0)); break; case 3 : a+=((uint32_t)k[2])<<8; case 2 : a+=((uint32_t)k[1])<<16; case 1 : a+=((uint32_t)k[0])<<24; } final(a,b,c); return c;}
Murmur3 hash
The location of murmur3 hash is in murmur3_hash.c.
// Do not check for data out-of-bounds issues. It is mainly used to obtain some random numbers # define FORCE_INLINE _ attribute _ (always_inline) // shift left of static inline uint32_t ROTL32 (uint32_t x, int8_t r) {return (x <r) | (x> (32-r);} // obtain the value of position p of the pointer, I may be negative static FORCE_INLINE uint32_t getblock32 (const uint32_t * p, int I) {return p [I];} static FORCE_INLINE uint32_t fmix32 (uint32_t h) {h ^ = h> 16; h * = 0x85ebca6b; h ^ = h> 13; h * = 0xc2b2ae35; h ^ = h> 16; return h ;} uint32_t forward (const void * key, size_t length) {const uint8_t * data = (const uint8_t *) key; const int nblocks = length/4; uint32_t h1 = 0; uint32_t c1 = bytes; uint32_t c2 = 0x1b873593; const uint32_t * blocks = (const uint32_t *) (data + nblocks * 4); for (int I =-nblocks; I; I ++) {uint32_t k1 = getblock32 (blocks, I); k1 * = c1; k1 = ROTL32 (k1, 15); k1 * = c2; h1 ^ = k1; h1 = ROTL32 (h1, 13); h1 = h1 * 5 + 0xe6546b64;} const uint8_t * tail = (const uint8_t *) (data + nblocks * 4 ); uint32_t k1 = 0; switch (length & 3) {case 3: k1 ^ = tail [2] <16; case 2: k1 ^ = tail [1] <8; case 1: k1 ^ = tail [0]; k1 * = c1; k1 = ROTL32 (k1, 15); k1 * = c2; h1 ^ = k1 ;}; h1 ^ = length; h1 = fmix32 (h1); return h1 ;}
Additive Hash
Ub4 additive (char * key, ub4 len, ub4 prime) {ub4 hash, I; for (hash = len, I = 0; I
Rotating Hash
ub4 rotating(char *key, ub4 len, ub4 prime){ ub4 hash, i; for (hash=len, i=0; i
>28)^key[i]; return (hash % prime);}
One-at-a-Time Hash
ub4 one_at_a_time(char *key, ub4 len){ ub4 hash, i; for (hash=0, i=0; i
> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return (hash & mask);}
Bernstein hash
Ub4 bernstein (ub1 * key, ub4 len, ub4 level) {ub4 hash = level; ub4 I; for (I = 0; I
Goulburn Hash
u4 goulburn( const unsigned char *cp, size_t len, uint32_t last_value){ register u4 h = last_value; int u; for( u=0; u
> 29); h += g_table1[ h >> 25 ]; h ^= (h << 14) ^ (h >> 18); h += 1783936964UL; } return h;}
Murmur Hash
Uint32_t MurmurHash1 (const void * key, int len, uint32_t seed) {const unsigned int m = 0xc6a4a793;
const int r = 16;unsigned int h = seed ^ (len * m);//----------const unsigned char * data = (const unsigned char *)key;while(len >= 4){ unsigned int k = *(unsigned int *)data; h += k; h *= m; h ^= h >> 16; data += 4; len -= 4;}//----------switch(len){ case 3: h += data[2] << 16; case 2: h += data[1] << 8; case 1: h += data[0]; h *= m; h ^= h >> r;};//----------h *= m;h ^= h >> 10;h *= m;h ^= h >> 17;return h;
}
Pearson Hash
// This preinitializes tab [] to an arbitrary permutation of 0 .. 255. char pearson (char * key, ub4 len, char tab [256]) {char hash; ub4 I; for (hash = len, I = 0; I
CRC Hashing
ub4 crc(char *key, ub4 len, ub4 mask, ub4 tab[256]){ ub4 hash, i; for (hash=len, i=0; i
> 8) ^ tab[(hash & 0xff) ^ key[i]]; return (hash & mask);}
Generalized CRC Hashing
//The size of tab[] is the maximum number of input bits. //Values in tab[] are chosen at random. ub4 universal(char *key, ub4 len, ub4 mask, ub4 tab[MAXBITS]){ ub4 hash, i; for (hash=len, i=0; i<(len<<3); i+=8){ register char k = key[i>>3]; if (k&0x01) hash ^= tab[i+0]; if (k&0x02) hash ^= tab[i+1]; if (k&0x04) hash ^= tab[i+2]; if (k&0x08) hash ^= tab[i+3]; if (k&0x10) hash ^= tab[i+4]; if (k&0x20) hash ^= tab[i+5]; if (k&0x40) hash ^= tab[i+6]; if (k&0x80) hash ^= tab[i+7]; } return (hash & mask);}
Zobrist Hashing
Ub4 zobrist (char * key, ub4 len, ub4 mask, ub4 tab [MAXBYTES] [256]) {ub4 hash, I; for (hash = len, I = 0; I
This article from: http://tiankonguse.github.io, original address: http://github.tiankonguse.com//blog/2014/11/07/memcached-string-hash/, thanks to the original author to share.