The idea of the algorithm is:
- Traverse the file from beginning to end, reading every word traversed from the file.
- Put the traversed words into the hash_map and count the occurrences of the word.
- Traverse the hash_map to place the number of occurrences of the traversed word in the priority queue.
- When the number of elements in the priority queue exceeds K, the element with the lowest element level is taken out of the queue, so that the element that always holds the queue is K.
- After traversing the hash_map, the most frequently occurring k elements are left in the queue.
The concrete implementation and results are as follows:
//K words with the most occurrences. Cpp:defines the entry point for the console application.#include"stdafx.h"#include#include<string>#include<fstream>#include<queue>#include<iostream>#include<algorithm>#include<boost/timer.hpp>using namespacestd;using namespaceboost;voidTop_k_words ()//most occurrences are a word.{timer T; Ifstream fin; Fin.open ("Modern C.txt"); if(!Fin) {cout<<"can not open file"<<Endl; } strings; Hash_map<string,int>CountWords; while(true) {Fin>>s; Countwords[s]++; if(fin.eof ()) { Break; }} cout<<"Total number of words (Repeat count):"<<countwords.size () <<Endl; Priority_queue<pair<int,string>,vector<pair<int,string>>,greater<pair<int,string>>>Countmax; for(hash_map<string,int>::const_iterator i=Countwords.begin (); I!=countwords.end (); i++) {Countmax.push (Make_pair (i->second,i->First )); if(Countmax.size () >Ten) {countmax.pop (); } } while(!Countmax.empty ()) {cout<<countmax.top () .second<<" "<<countmax.top () .first<<Endl; Countmax.pop (); } cout<<"Time Elapsed"<<t.elapsed () <<Endl;}intMainintargcChar*argv[]) {top_k_words (); System ("Pause"); return 0;}
Linux can not use Hash_map, instead of a map to count the number of words:
//K words with the most occurrences. Cpp:defines the entry point for the console application. #include <map>#include<string>#include<fstream>#include<queue>#include<iostream>#include<algorithm>using namespacestd;voidTop_k_words ()//most occurrences are a word.{ifstream fin; Fin.open ("Modern C.txt"); if(!Fin) {cout<<"can not open file"<<Endl; } strings; Map<string,int>CountWords; while(true) {Fin>>s; Countwords[s]++; if(fin.eof ()) { Break; }} cout<<"Total number of words (Repeat count):"<<countwords.size () <<Endl; Priority_queue<pair<int,string>,vector<pair<int,string>>,greater<pair<int,string>>>Countmax; for(map<string,int>::const_iterator I=countwords.begin (); I!=countwords.end (); i++) {Countmax.push (Make_pair (i->second,i->First )); if(Countmax.size () >Ten) {countmax.pop (); } } while(!Countmax.empty ()) {cout<<countmax.top () .second<<" "<<countmax.top () .first<<Endl; Countmax.pop (); }}intMainintargcChar*argv[]) {top_k_words (); return 0;}
Count the number of occurrences of each word in the article