In the word programming, We need to count the most frequently used words in the book Bible. This is a typical top K problem. It is solved using the classic hash + heap method,CodeAs follows:
# Include <stdio. h> # include <string. h> # include <malloc. h> # include <assert. h> # include <stdlib. h> # include <ctime> # define hashlen 101 # define wordlen 30 # define Max 100000 # define domain 300 # define K 10 // The Node definition of the hash linked list typestrudef CT node {char * word; int count; struct node * Next;} node, * node_ptr; static node_ptr head [hashlen]; static node array [k]; // hash function int hash_function (char * P) {unsigned int value = 0; while (* P! = '\ 0') {value = value * 31 + * P ++; If (value> hashlen) value = Value % hashlen;} return value ;} // Add the node to the hash linked list void append_word (char * Str) {int Index = hash_function (STR); node_ptr P = head [Index]; while (P! = NULL) {If (strcmp (STR, p-> word) = 0) {(p-> count) ++; return;} p = p-> next ;} // create a node node_ptr q = (node_ptr) malloc (sizeof (node); q-> COUNT = 1; q-> word = (char *) malloc (sizeof (STR) + 1); strcpy (Q-> word, STR); // insert into the list headq-> next = head [Index]; head [Index] = Q;} // generate 0 ~ Max integers in the domain-1 range void gen_data () {file * fp = fopen ("C: // data1.txt", "W"); Assert (FP ); int I = 0; srand (INT) (time (0); for (I = 0; I <Max; I ++) fprintf (FP, "% d", Rand () % domain); fclose (FP) ;}// heap adjustment: adjusted to the minimum heap void heapadjust (node array [], int beginindex, int endindex, int index) {int length = endindex-beginindex + 1; int largestindex = index; int leftindex = 2 * index + 1; // The subscript starts from 0, you can test int rightindex on your own. = 2 * index + 2; If (leftindex <= length-1 & array [leftindex]. count <= array [largestindex]. count) {largestindex = leftindex;} If (rightindex <= length-1 & array [rightindex]. count <= array [largestindex]. count) {largestindex = rightindex;} If (largestindex! = Index) {node temp = array [largestindex]; array [largestindex] = array [Index]; array [Index] = temp; heapadjust (array, beginindex, endindex, largestindex) ;}}// create heap void heapbuild (node array [], int Len) {int I = 0; for (I = Len/2-1; i> = 0; I --) {heapadjust (array, 0, len-1, I);} return ;} int main () {gen_data (); char STR [wordlen]; int I; int cnt1 = 0; // initfor (I = 0; I
The core code in the Code is:
1. Hash hash function
2. Adjust the heap function.
Key data structure:
Suitable for hash struct, including strings, statistics, and pointers