/* Search for popular queries: The Search Engine records all the search strings used by the user each time through log files. The length of each query string is 1-bytes. Suppose there are currently 10 million records, and these query strings have a high degree of repetition. Although the total number is 10 million, the number of query strings should not exceed 3 million after repetition. The higher the repetition of a query string, the more users query it, that is, the more popular it is. Please count the top 10 query strings. The memory required cannot exceed 1 GB. (1) Describe your ideas for solving this problem; (2) provide the main processing procedures, algorithms, and complexity of algorithms.
Author: Sankt.
My program reads log files based on a simple idea. Each row is a search string.
*/
# Include <iostream>
# Include <fstream>
# Include <map>
# Include <vector>
# Include <utility>
Using namespace STD;
Typedef vector <pair <string, int> newvector;
Typedef pair <string, int> newpair;
Struct intcmp
{
Bool operator () (const newpair & V1, const newpair & V2) const
{
Return v1.second> v2.second;
}
};
Int main ()
{
Map <string, int> mapstr;
Ifstream fin ("log.txt ");
If (fin = NULL)
{
Cerr <"the file was not opened." <Endl;
Exit (1 );
}
String strtemp;
Map <string, int >:: iterator ite;
While (Getline (FIN, strtemp ))
{
ITE = mapstr. Find (strtemp );
If (ITE! = Mapstr. End ())
{
Mapstr [strtemp] ++;
}
Else
{
Mapstr. insert (make_pair (strtemp, 1 ));
}
}
Newvector VEC;
For (ITE = mapstr. Begin (); ite! = Mapstr. End (); ++ ITE)
{
Cout <(* ITE). First <"" <(* ITE). Second <Endl;
Pair <string, int> PA (* ITE). First, (* ITE). Second );
VEC. push_back (PA );
}
Cout <Endl;
Cout <"============================ after sort ====================== ==== "<Endl;
Cout <Endl;
Sort (VEC. Begin (), VEC. End (), intcmp ());
Newvector: iterator vecite;
For (vecite = Vec. Begin (); vecite! = Vec. End (); ++ vecite)
{
Cout <(* vecite). First <"" <(* vecite). Second <Endl;
}
System ("pause ");
Return 0;
}