I. Overview
Hash table ADT supports only a subset of the operations allowed by the binary lookup tree, and the implementation of the hash table is often called a hash (hashing). Hashing is a technique that performs inserts, deletes, and lookups with a constant mean time. However, operations that require any sort information between elements will not be supported effectively. Second, realize
The ideal hash data structure is nothing more than a fixed-size array containing keywords. Typically, a keyword is a string with related values. We take the size of a table as a tablesize and interpret it as part of a hash data structure rather than just a variable that floats globally. The usual habit is to make the table change from 0 to TableSize-1. Each keyword is mapped to a number in the range from 0 to TableSize-1, and is placed in the appropriate unit. This mapping is called a hash function, ideally it should be simple and should ensure that any two different keywords are mapped to different cells. However, this is not possible because the number of units is limited and the keyword is virtually endless. So, we're looking for a hash function that distributes the keywords evenly between the cells.
This is the basic idea of hashing. The remaining problem is to select a function that determines what should be done when two keywords are hashed to the same value (called a conflict (collision)) and how to determine the size of the hash table.
If the input keyword is an integer, then it is generally reasonable to return directly to the result of the key mod tablesize, unless the key happens to have some undesirable nature. In this case, the choice of the hash function needs careful consideration.
There are several ways to resolve conflicts, and we will discuss two of the simplest: the separation link method and the open addressing method. 1. Separate link method
The practice is to keep all elements of the hash to the same value in a table, and for convenience, the tables have headers (the size of the table is not prime and is used here for simplicity), as shown in Figure 1.
Figure 1 separating a linked hash list
FileName: hashsep.h
#ifndef _hashsep_h
typedef int ElementType;
typedef unsigned int Index;
struct ListNode;
typedef struct LISTNODE *position;
struct HASHTBL;
typedef struct HASHTBL *hashtable;
Index Hash (const int Key, int tablesize);
HashTable initializetable (int tablesize);
void Destroytable (HashTable H);
Position Find (ElementType Key, HashTable H);
void Insert (ElementType Key, HashTable H);
ElementType Retrieve (Position P);
/* Routines such as Delete and Makeempty are omitted */
#endif/* _hashsep_h * *
FileName: hashsep.c
#include "fatal.h" #include "hashsep.h" #define MINTABLESIZE (a) typedef Position List;
Index Hash (const int key, int tablesize) {return Key% tablesize}
struct ListNode {ElementType Element;
Position Next;
}; /* List *thelist'll is an array of lists,allocated later/* The lists use headers (for simplicity), */* Though this
Wastes space */struct HASHTBL {int tablesize;
List *thelists;
}; /* return next prime;
Assume n >=/static int nextprime (int N) {int i;
if (N% 2 = 0) n++; for (;; n + + 2) {for (i = 3; I * I <= n; i + + 2) if (n% i = = 0) goto Contouter; /* Sorry about this!
* * return N;
Contouter:;
} HashTable initializetable (int tablesize) {HashTable H;
int i;
if (Tablesize < mintablesize) {Error ("Table size Too small");
return NULL;
}/* Allocate table */H = malloc (sizeof (struct hashtbl));
if (H = = NULL) fatalerror ("Out of a space!!!"); H->tablesize = NextPrime (tablesize);
/* Allocate Array of lists */h->thelists = malloc (sizeof (List) * h->tablesize);
if (h->thelists = NULL) fatalerror ("Out of a space!!!"); /* Allocate List Headers * * for (i = 0; i < h->tablesize; i++) {h->thelists[i] = malloc (sizeof (struct L
Istnode));
if (h->thelists[i] = = NULL) fatalerror ("Out of a space!!!");
else h->thelists[i]->next = NULL;
return H;
Position Find (ElementType Key, HashTable H) {Position P;
List L;
L = h->thelists[Hash (Key, h->tablesize)];
P = l->next; while (P!= NULL && p->element!= Key)/* Probably need strcmp!!
* P = p->next;
return P;
} void Insert (ElementType Key, HashTable H) {Position Pos, Newcell;
List L;
Pos = Find (Key, H);
if (Pos = NULL)/* Key is not found/{Newcell = malloc (sizeof (struct listnode));
if (Newcell = NULL) fatalerror ("Out of a space!!!"); else {L = h->thelists[Hash (Key,H->tablesize)];
Newcell->next = l->next; Newcell->element = Key; /* Probably need strcpy!!
* * L->next = Newcell;
}} void Destroytable (HashTable H) {int i;
for (i = 0; i < h->tablesize; i++) {Position P = h->thelists[i];
Position Tmp;
while (P!= NULL) {Tmp = p->next;
Free (P);
P = TMP;
} free (h->thelists);
Free (H);
} ElementType Retrieve (Position P) {return p->element;}
FileName: main.c
#include "hashsep.h"
#include <stdio.h>
int main ()
{
HashTable h = initializetable ();
int i;
printf ("hashtable:\n");
for (i = 1; i < i++)
{
Insert (i * I, H);
printf ("%d:%d\n", I*i, Hash (i * I, ten));
}
return 0;
}
2. Open approach
The disadvantage of the split-link hashing algorithm is that it requires pointers, because it takes time to assign addresses to new cells, which causes the speed of the algorithm to be slowed down somewhat, while the algorithm actually requires the implementation of another data structure. In addition to using linked lists to resolve conflicts, open addressing hashing (open addressing hashing) is another way to resolve conflicts without linked lists. In an open addressable hashing algorithm system, if there is a conflict, try to select another unit until the empty cell is found.
FileName: hashquad.h
#ifndef _hashquad_h
typedef int ElementType;
typedef unsigned int Index;
typedef Index Position;
struct HASHTBL;
typedef struct HASHTBL *hashtable;
static int nextprime (int N);
Index Hash (ElementType Key, int tablesize);
HashTable initializetable (int tablesize);
void Destroytable (HashTable H);
Position Find (ElementType Key, HashTable H);
void Insert (ElementType Key, HashTable H);
ElementType Retrieve (Position P, HashTable H);
HashTable Rehash (HashTable H);
/* Routines such as Delete and Makeempty are omitted */
#endif/* _hashquad_h * *
FileName: hashquad.c
#include "hashquad.h" #include "fatal.h" #define Mintablesize (a) enum Kindofentry {legitimate, Empty, Deleted};
struct Hashentry {ElementType Element;
Enum Kindofentry Info;
};
typedef struct HASHENTRY Cell;
/* Cell *thecells'll be is an array of */* hashentry cells, allocated later/struct HASHTBL {int tablesize;
Cell *thecells;
}; /* return next prime;
Assume n >=/static int nextprime (int N) {int i;
if (N% 2 = 0) n++; for (;; n + + 2) {for (i = 3; I * I <= n; i + + 2) if (n% i = = 0) goto Contouter; /* Sorry about this!
* * return N;
Contouter:;
}/* Hash function for INTs */Index hash (elementtype key, int tablesize) {return key% Tablesize;}
HashTable initializetable (int tablesize) {HashTable H;
int i;
if (Tablesize < mintablesize) {Error ("Table size Too small!");
return NULL;
}/* Allocate table */H = malloc (sizeof (struct hashtbl)); if (H = = NULL) fatalerror ("Out of space!!!");
H->tablesize = NextPrime (tablesize);
/* Allocate Array of Cells */h->thecells = malloc (sizeof (Cell) * h->tablesize);
if (h->thecells = NULL) fatalerror ("Out of a space!!!"); for (i = 0; i < h->tablesize; i++) h->thecells[i].
Info = Empty;
return H;
Position Find (ElementType Key, HashTable H) {Position currentpos;
int collisionnum;
Collisionnum = 0;
Currentpos = Hash (Key, h->tablesize); while (h->thecells[Currentpos]. Info!= Empty && h->thecells[Currentpos]. Element!= Key)/* Probably need strcpy!
* * {Currentpos + + 2 * ++collisionnum-1;
if (Currentpos >= h->tablesize) Currentpos-= h->tablesize;
return currentpos;
} void Insert (ElementType Key, HashTable H) {Position Pos;
Pos = Find (Key, H); if (h->thecells[Pos). Info!= Legitimate) {/* Ok to insert here/* h->thecells[Pos].
Info = legitimate; h->thecells[Pos]. Element = Key; /* Probably NEed strcpy!
*/}} HashTable Rehash (HashTable H) {int i, oldsize;
Cell *oldcells;
Oldcells = h->thecells;
Oldsize = h->tablesize;
/* Get a new, empty table */H = initializetable (2 * oldsize); /* Scan through old table, reinserting into new * * for (i = 0; i < oldsize; i++) if (oldcells[i). Info = = legitimate) Insert (oldcells[i).
Element, H);
Free (oldcells);
return H; } ElementType Retrieve (Position p, HashTable H) {return h->thecells[P].
Element;
} void Destroytable (HashTable H) {free (h->thecells);
Free (H); }
FileName: main.c
#include "hashquad.h"
#include <stdio.h>
int main ()
{
HashTable h = initializetable ();
int i;
printf ("Hash Table: \ n");
for (i = 1; i < i++)
{
Insert (i * I, H);
printf ("%d:%d\n", I*i, Hash (i * I, ten));
}
return 0;
}
Appendix: The above code uses the error, fatalerror and other functions, its implementation is as follows (that is, fatal.h file):
#include <stdio.h>
#include <stdlib.h>
#define ERROR (str) fatalerror (str)
#define FatalError (str) fprintf (stderr, "%s\n", Str), exit (1)
Note: This digest from the "Data structure and algorithm analysis C language description Mark Allen Weiss", the code compiled by GCC test pass.
Attachment Download: http://download.csdn.net/detail/shuxiao9058/4212416#hashsep_20120406.tar.gz, http://download.csdn.net/ Detail/shuxiao9058/4212417# hashquad_20120406.tar.gz