Word Frequency Statistics Update
Implementation function: Input the file path from the console, and count the total number of words and the number of words not duplicated, and output all word frequency, and sorting.
Header file
1 #include <stdio.h>2 #include <stdlib.h>3 #include <string.h>
Defining macros
#define Word_length 250
Defining structures and global variables
struct node{ char word[word_length]; int Time ; struct Node *struct topnode{ int sum; // Number of full- text words int num; // The number of full-text no repeating words Wordnode ** L = NULL;
Declaring a function used in a file
wordnode *wordsearch (word); void wordjob (char word[]); void wordCount (char *word); void Printcountlist (); void Printfirsttentimes (); void mergesort (wordnode **head); void Frontbacksplit (wordnode *head,wordnode **pre,wordnode **next); Wordnode * Sortedmerge (wordnode *pre,wordnode *next); void release ();
Main function
intMainintargcChar*argv[]) { CharTemp[word_length];//defines an array for temporarily storing words Charfile_path[ -]; Wordnode*h; FILE*file; printf ("Please enter the file path:"); Gets (File_path); if(File = fopen (File_path,"R")) ==NULL) {printf ("file read failed!"); Exit (1); } L= &T; L->num =0; L->sum =0; L->next =NULL; while(fscanf (file,"%s", temp))! =EOF) {L->sum++; Wordjob (temp); WordCount (temp); } fclose (file); Printcountlist (); printf ("\ n \ nthe output word frequency is the highest 10 words \ n"); H= l->Next; MergeSort (&H);//SortPrintfirsttentimes (); Release (); return 0;}
Find the node where the word is located and return
Wordnode *wordsearch (Char*word) { Char*T; Wordnode*node; Wordnode*nextnode = l->Next; if(L->next = =NULL) {Node= (wordnode*)malloc(sizeof(Wordnode)); strcpy (Node-Word,word); Node->time =0; Node->next = NULL;//Initial, must have, otherwise there will be errors. l->num++; L->next =node; returnnode; } while(NextNode! = NULL)//Find matching words{T= nextnode->Word; if(strcmp (t,word) = =0) { returnNextNode; } nextnode= nextnode->Next; } if(NextNode = = NULL)//The word does not exist in the original list{node= (wordnode*)malloc(sizeof(Wordnode)); strcpy (Node-Word, word); Node->time =0; Node->next = l->Next; L->next =node; L->num++; returnnode; } Else returnNextNode;//returns the found node}
Word Frequency statistics
void wordCount (char *word) { *tmpnode; = Wordsearch (word); // the node where Word is located tmpnode->time++;}
Output all Word frequency
voidprintcountlist () {inti =0; Wordnode*node = l->Next; if(L->next = =NULL) {printf ("the file has no content! "); } Else{printf ("\ n This article total%d words \ n Not repeating the word%d \ n",l->sum,l->num); printf ("\ n Output the frequency of all words \ n"); while(Node! =NULL) {printf ("%s:%d times \ t",node->word,node->Time ); I++; Node= node->Next; if(i%4==0) printf ("\ n"); } }}
output the highest frequency of the wordTena word
voidPrintfirsttentimes () {Wordnode*node = l->Next; inti =1; if(L->next = =NULL) {printf ("the file has no content!"); } Else { while(Node! = NULL && i<=Ten) {printf ("\t%s:%d times \ n",node->word,node->Time ); Node= node->Next; I++; } }}
Insertion sort of word frequency statistic results
void mergesort (wordnode * *headnode) {*pre,*next,*head ; = *headnode; if (head = = NULL | | head->next = = null) { return; } Frontbacksplit (head,&pre,&next); MergeSort (&pre); MergeSort (&next); *headnode = Sortedmerge (pre,next); // Insert Sort }
Tail node
voidFrontbacksplit (Wordnode *source,wordnode **pre,wordnode * *next) {Wordnode*Fast; Wordnode*slow; if(Source = = NULL | | source->next = =NULL) { *pre =source; *next =NULL; } Else{Slow=source; Fast= source->Next; while(Fast! =NULL) {Fast= fast->Next; if(Fast! =NULL) {Slow= slow->Next; Fast= fast->Next; } } *pre =source; Fast=source; *next = slow->next;//pre and Next are transmittedSlow->next =NULL; }}
Take the node with the highest frequency as the head node
Wordnode *sortedmerge (Wordnode *pre,wordnode *next) {Wordnode*result =NULL; if(Pre = =NULL)returnNext; Else if(Next = =NULL)returnPre; if(Pre->time >= next->Time ) {Result=Pre; Result->next = Sortedmerge (pre->next,next); } Else{result=Next; Result->next = Sortedmerge (pre,next->next); } returnresult;}
Working with words
voidWordjob (Charword[]) { inti,k; for(i =0; I<strlen (word); i++) { if(word[i]>='A'&& word[i]<='Z') {Word[i]+= +; Continue; } if(word[i]<'a'|| Word[i]>'Z') { if(i = = (strlen (word)-1) ) {Word[i]=' /'; } Else{k=i; while(I <strlen (Word)) {Word[i]= word[i+1]; I++; } I=K; } } }}
Free all node memory.
void release () { *pre; if (L->next = = NULL ) return ; = L->Next; while (Pre! = NULL) { L->next = pre->next; Free (pre); = L->next; }}
Ssh://[email Protected]:amberpass/cptjgx.git
Https://git.coding.net/amberpass/cptjgx.git
2nd Word Frequency Statistics update