編程珠璣 15.1 單詞

來源:互聯網
上載者:User

編程珠璣單詞中,需要統計《聖經》一書中,出現頻率最高的單詞,這是典型的TOP K問題,採用經典的Hash + 堆方法解決,代碼如下:

#include <stdio.h>#include <string.h>#include <malloc.h>#include <assert.h>#include <stdlib.h>#include <ctime>#define HASHLEN 101#define WORDLEN 30#define MAX  100000#define DOMAIN 300#define K 10//Hash鏈表的節點定義typedef struct node{char *word;int count;struct node *next;} node, *node_ptr;static node_ptr head[HASHLEN];static node array[K];//Hash函數int hash_function(char *p){unsigned int value = 0;while (*p != '\0'){value = value * 31 + *p++;if (value > HASHLEN)value = value % HASHLEN;}return value;}//加入節點到HASH鏈表void append_word(char *str){int index = hash_function(str);node_ptr p = head[index];while (p != NULL){if (strcmp(str, p->word) == 0){(p->count)++;return;}p = p->next;}// 建立一個結點node_ptr q = (node_ptr) malloc(sizeof(node));q->count = 1;q->word = (char *) malloc(sizeof(str) + 1);strcpy(q->word, str);//insert into the list headq->next = head[index];head[index] = q;}//產生0~DOMAIN - 1範圍內的MAX個整數void gen_data(){FILE *fp = fopen("c://data1.txt", "w");assert(fp);int i = 0;srand((int) (time(0)));for (i = 0; i < MAX; i++)fprintf(fp, "%d  ", rand() % DOMAIN);fclose(fp);}//堆調整:調整為最小堆void heapAdjust(node array[], int beginIndex, int endIndex, int index){int length = endIndex - beginIndex + 1;int largestIndex = index;int leftIndex = 2 * index + 1; //下標從0開始,可以自己做實驗int rightIndex = 2 * index + 2;if (leftIndex <= length - 1&& array[leftIndex].count <= array[largestIndex].count){largestIndex = leftIndex;}if (rightIndex <= length - 1&& array[rightIndex].count <= array[largestIndex].count){largestIndex = rightIndex;}if (largestIndex != index){node temp = array[largestIndex];array[largestIndex] = array[index];array[index] = temp;heapAdjust(array, beginIndex, endIndex, largestIndex);}}//建堆void heapBuild(node array[], int len){int i = 0;for (i = len / 2 - 1; i >= 0; i--){heapAdjust(array, 0, len - 1, i);}return ;}int main(){gen_data();char str[WORDLEN];int i;int cnt1 = 0;//initfor (i = 0; i < HASHLEN; i++)head[i] = NULL;FILE *fp_passage = fopen("c://data1.txt", "r");assert(fp_passage);while (fscanf(fp_passage, "%s", str) != EOF){cnt1++;append_word(str);}printf("the cnt1 is %d\n", cnt1);fclose(fp_passage);//尋找Top Kfor (i = 0; i < HASHLEN; i++){if (i < K - 1)array[i] = *head[i];else if (i == K - 1){array[i] = *head[i];heapBuild(array, K);}else{if (array[0].count < head[i]->count){array[0] = *head[i];heapAdjust(array, 0, K - 1, 0);}}}printf("the top %d is as follows\n", K);for (i = 0; i < K; i++)printf("%s , and its count is %d\n", array[i].word, array[i].count);//printf("the total number of word is %d",cnt);return 0;}

代碼中核心代碼有:

1,雜湊散列函數

2,調整堆函數

關鍵資料結構:

適應hash的結構體,包含字串,統計資訊,指標

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.