Memcached source code reading string hash and some string hash collected

Source: Internet
Author: User
Preface at the end of the hashtable article on memcached source code reading, I will study the string hash method of memcached. Now, record the results of the study. Jenkinshashjenkins is located at jenkins_hash.c. Little-Endian is the low-level byte which is routed to the low-end address of the memory.

Preface at the end of the hash table article read by memcached source code, I will study the string hash method of memcached. Now, record the results of the study. The position of Jenkins hash jenkins is in jenkins_hash.c. Little-Endian on the big-end and small-end, which means that the low-level bytes are routed to the low-end address of the memory.

Preface

At the end of the hash table article read by memcached source code, I will study the string hash method of memcached.
Now, record the results of the study.

Jenkins hash

Jenkins is located in jenkins_hash.c.

Big-end and small-end

Little-Endian is the low-level byte emission at the low-level address end of the memory, and the high-level byte emission at the high-level address end of the memory.
Big-Endian is the low address of the memory where the high byte is discharged, and the low byte is discharged to the high address of the memory.
For example, the representation of the number 0x12 34 56 78 in the memory is as follows:

1) big end mode: Low address ---------------> high address 0x12 | 0x34 | 0x56 | 0x78 2) Small End mode: low address ----------------> high address 0x78 | 0x56 | 0x34 | 0x12
#if ENDIAN_BIG == 1# define HASH_LITTLE_ENDIAN 0# define HASH_BIG_ENDIAN 1#else# if ENDIAN_LITTLE == 1#  define HASH_LITTLE_ENDIAN 1#  define HASH_BIG_ENDIAN 0# else#  define HASH_LITTLE_ENDIAN 0#  define HASH_BIG_ENDIAN 0# endif#endif
Rot macro

The first one is the rot macro.
This macro shifts the number of places left in a loop.

#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
Mix macro

A reversible encryption.
This is reversible, so any information in (a, B, c) before mix () is still in (a, B, c) after mix ().

#define mix(a,b,c) \{ \  a -= c;  a ^= rot(c, 4);  c += b; \  b -= a;  b ^= rot(a, 6);  a += c; \  c -= b;  c ^= rot(b, 8);  b += a; \  a -= c;  a ^= rot(c,16);  c += b; \  b -= a;  b ^= rot(a,19);  a += c; \  c -= b;  c ^= rot(b, 4);  b += a; \}
Final macro

Final mixing of 3 32-bit values (a, B, c) into c
Merge a, B, and c into c.

#define final(a,b,c) \{ \  c ^= b; c -= rot(b,14); \  a ^= c; a -= rot(c,11); \  b ^= a; b -= rot(a,25); \  c ^= b; c -= rot(b,16); \  a ^= c; a -= rot(c,4);  \  b ^= a; b -= rot(a,14); \  c ^= b; c -= rot(b,24); \}
Hash Algorithm

In the source code, there are large and small terminals, and they are also divided into 0x3 or 0x1. I don't know what to do now.

uint32_t jenkins_hash( const void *key, size_t length) {    uint32_t a,b,c;    a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;    const char *k = (const char *)key;    while (length > 12) {        a += ((uint32_t)k[0])<<24;        a += ((uint32_t)k[1])<<16;        a += ((uint32_t)k[2])<<8;        a += ((uint32_t)k[3]);        b += ((uint32_t)k[4])<<24;        b += ((uint32_t)k[5])<<16;        b += ((uint32_t)k[6])<<8;        b += ((uint32_t)k[7]);        c += ((uint32_t)k[8])<<24;        c += ((uint32_t)k[9])<<16;        c += ((uint32_t)k[10])<<8;        c += ((uint32_t)k[11]);        mix(a,b,c);        length -= 12;        k += 12;    }    switch(length) {    case 12:        c+=k[11];    case 11:        c+=((uint32_t)k[10])<<8;    case 10:        c+=((uint32_t)k[9])<<16;    case 9 :        c+=((uint32_t)k[8])<<24;    case 8 :        b+=k[7];    case 7 :        b+=((uint32_t)k[6])<<8;    case 6 :        b+=((uint32_t)k[5])<<16;    case 5 :        b+=((uint32_t)k[4])<<24;    case 4 :        a+=k[3];    case 3 :        a+=((uint32_t)k[2])<<8;    case 2 :        a+=((uint32_t)k[1])<<16;    case 1 :        a+=((uint32_t)k[0])<<24;        break;    case 0 :        return c;    }    final(a,b,c);    return c;}

After reading this code, we can shorten it.

uint32_t jenkins_hash( const void *key, size_t length) {    uint32_t a,b,c;    a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;    const char *k = (const char *)key;    while (length >= 12) {        a += *((uint32_t*)(k+0));        b += *((uint32_t*)(k+4));        c += *((uint32_t*)(k+8));        mix(a,b,c);        length -= 12;        k += 12;    }    if(length == 0) {        return c;    }    switch(length) {        case 11:            c+=((uint32_t)k[10])<<8;        case 10:            c+=((uint32_t)k[9])<<16;        case 9 :            c+=((uint32_t)k[8])<<24;        case 8 :            b += *((uint32_t*)(k+4));            a += *((uint32_t*)(k+0));            break;        case 7 :            b+=((uint32_t)k[6])<<8;        case 6 :            b+=((uint32_t)k[5])<<16;        case 5 :            b+=((uint32_t)k[4])<<24;        case 4 :            a += *((uint32_t*)(k+0));            break;        case 3 :            a+=((uint32_t)k[2])<<8;        case 2 :            a+=((uint32_t)k[1])<<16;        case 1 :            a+=((uint32_t)k[0])<<24;    }    final(a,b,c);    return c;}
Murmur3 hash

The location of murmur3 hash is in murmur3_hash.c.

// Do not check for data out-of-bounds issues. It is mainly used to obtain some random numbers # define FORCE_INLINE _ attribute _ (always_inline) // shift left of static inline uint32_t ROTL32 (uint32_t x, int8_t r) {return (x <r) | (x> (32-r);} // obtain the value of position p of the pointer, I may be negative static FORCE_INLINE uint32_t getblock32 (const uint32_t * p, int I) {return p [I];} static FORCE_INLINE uint32_t fmix32 (uint32_t h) {h ^ = h> 16; h * = 0x85ebca6b; h ^ = h> 13; h * = 0xc2b2ae35; h ^ = h> 16; return h ;} uint32_t forward (const void * key, size_t length) {const uint8_t * data = (const uint8_t *) key; const int nblocks = length/4; uint32_t h1 = 0; uint32_t c1 = bytes; uint32_t c2 = 0x1b873593; const uint32_t * blocks = (const uint32_t *) (data + nblocks * 4); for (int I =-nblocks; I; I ++) {uint32_t k1 = getblock32 (blocks, I); k1 * = c1; k1 = ROTL32 (k1, 15); k1 * = c2; h1 ^ = k1; h1 = ROTL32 (h1, 13); h1 = h1 * 5 + 0xe6546b64;} const uint8_t * tail = (const uint8_t *) (data + nblocks * 4 ); uint32_t k1 = 0; switch (length & 3) {case 3: k1 ^ = tail [2] <16; case 2: k1 ^ = tail [1] <8; case 1: k1 ^ = tail [0]; k1 * = c1; k1 = ROTL32 (k1, 15); k1 * = c2; h1 ^ = k1 ;}; h1 ^ = length; h1 = fmix32 (h1); return h1 ;}
Additive Hash
Ub4 additive (char * key, ub4 len, ub4 prime) {ub4 hash, I; for (hash = len, I = 0; I
 
  
Rotating Hash
  
ub4 rotating(char *key, ub4 len, ub4 prime){    ub4 hash, i;    for (hash=len, i=0; i
   
    >28)^key[i];    return (hash % prime);}
   
One-at-a-Time Hash
ub4 one_at_a_time(char *key, ub4 len){    ub4   hash, i;    for (hash=0, i=0; i
   
    > 6);    }    hash += (hash << 3);    hash ^= (hash >> 11);    hash += (hash << 15);    return (hash & mask);}
   
Bernstein hash
Ub4 bernstein (ub1 * key, ub4 len, ub4 level) {ub4 hash = level; ub4 I; for (I = 0; I
   
    
Goulburn Hash
    
u4 goulburn( const unsigned char *cp, size_t len, uint32_t last_value){    register u4 h = last_value;    int u;    for( u=0; u
     
      > 29);        h += g_table1[ h >> 25 ];        h ^= (h << 14) ^ (h >> 18);        h += 1783936964UL;    }    return h;}
     
Murmur Hash

Uint32_t MurmurHash1 (const void * key, int len, uint32_t seed) {const unsigned int m = 0xc6a4a793;

const int r = 16;unsigned int h = seed ^ (len * m);//----------const unsigned char * data = (const unsigned char *)key;while(len >= 4){    unsigned int k = *(unsigned int *)data;    h += k;    h *= m;    h ^= h >> 16;    data += 4;    len -= 4;}//----------switch(len){    case 3:    h += data[2] << 16;    case 2:    h += data[1] << 8;    case 1:    h += data[0];    h *= m;    h ^= h >> r;};//----------h *= m;h ^= h >> 10;h *= m;h ^= h >> 17;return h;

}

Pearson Hash
// This preinitializes tab [] to an arbitrary permutation of 0 .. 255. char pearson (char * key, ub4 len, char tab [256]) {char hash; ub4 I; for (hash = len, I = 0; I
     
      
CRC Hashing
      
ub4 crc(char *key, ub4 len, ub4 mask, ub4 tab[256]){    ub4 hash, i;    for (hash=len, i=0; i
       
        > 8) ^ tab[(hash & 0xff) ^ key[i]];    return (hash & mask);}
       
Generalized CRC Hashing
//The size of tab[] is the maximum number of input bits. //Values in tab[] are chosen at random. ub4 universal(char *key, ub4 len, ub4 mask, ub4 tab[MAXBITS]){    ub4 hash, i;    for (hash=len, i=0; i<(len<<3); i+=8){        register char k = key[i>>3];        if (k&0x01) hash ^= tab[i+0];        if (k&0x02) hash ^= tab[i+1];        if (k&0x04) hash ^= tab[i+2];        if (k&0x08) hash ^= tab[i+3];        if (k&0x10) hash ^= tab[i+4];        if (k&0x20) hash ^= tab[i+5];        if (k&0x40) hash ^= tab[i+6];        if (k&0x80) hash ^= tab[i+7];    }    return (hash & mask);}
Zobrist Hashing
Ub4 zobrist (char * key, ub4 len, ub4 mask, ub4 tab [MAXBYTES] [256]) {ub4 hash, I; for (hash = len, I = 0; I
           
        

This article from: http://tiankonguse.github.io, original address: http://github.tiankonguse.com//blog/2014/11/07/memcached-string-hash/, thanks to the original author to share.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.