Word Frequency statistics

Source: Internet
Author: User

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 27//26 Letters and '
Structure definition of a dictionary tree
typedef struct WORD
{
Word *next[max];//array subscript 0-25 for lowercase letters, 26 '
int num;
};
Structure definition: words and corresponding frequencies
typedef struct TLIST
{char word[200];
int time;
};
struct tlist list[3000000];
Word *root;
Char str[200]= "";
Char tempword[1000];
int size=0;
Functions for new words
void Createword (char *str)
{
int len = strlen (str), ID;
Word *p = root, *q;
for (int i = 0; i < len; i + +)//traverse the word to determine whether the current character is a letter or
{if (Str[i] >= ' A ' && str[i] <= ' z ')
id = str[i]-' a ';
if (Str[i] >= ' A ' && str[i] <= ' Z ')
id = str[i]-' A ';
if (str[i] = = ' \ ')
id = 26;
if (p->next[id] = = NULL)//If you have reached the end of the list, open a new structure to deposit letters

{

Q = (Word *) malloc (sizeof (word));

for (int j = 0; J < MAX; J + +)

{Q->num=0;q->next[j] = NULL;}

P->next[id] = q;
p = p->next[id];

}

else//If the end of the list is not reached, the pointer points to the next

{

p = p->next[id];

}
}
p->num++;

}
The function of reading a word
void Readword (Word *p,int len)

{
int i;

for (i=0;i<27;i++)

{

if (p->next[i]!=null)

{

if (i==26)
{str[len+1]= '; str[len]= '; len++;}
Else
{
Str[len]= ' a ' +i;
len++;
}
Readword ((word*) p->next[i],len);
len--;
}

}
if (p->num!=0)

{
str[len]= ' + ';

strcpy (LIST[SIZE].WORD,STR);

If a word end flag is encountered, the STR is deposited

List[size].word;

list[size].time=p->num;

size++;

}

}
Main program
int main ()

{

root= (word*) malloc (sizeof (Word));

int i,j;

for (i=0;i<27;i++) root->next[i]=null;

FILE *FP;

char x;

int len=0;

Fp=fopen ("Text.txt", "R");

while ((X=FGETC (FP))!=eof)//read in TXT document

{

if ((x>= ' A ' &&x<= ' Z ') | | (x>= ' A ' &&x<= ' z ') | | (x== ' \ ' &&len!=0))

{tempword[len]=x;len++;}

else {tempword[len]= ' + ';
Createword (Tempword);

len=0;

}

}

tempword[len]= ' + ';

Createword (Tempword);

len=0;

Fclose (FP);

Readword (root,0);

Compare frequency size bubble sort

struct Tlist temp;

for (i=0;i<size-1;i++)

for (j=i+1;j<size;j++)

if (list[i].time>list[j].time| | (LIST[I].TIME==LIST[J].TIME&AMP;&AMP;STRCMP (List[i].word,list[j].word) >0))

{
Temp.time=list[i].time;

List[i].time=list[j].time;


List[j].time=temp.time;


strcpy (Temp.word,list[i].word);


strcpy (List[i].word,list[j].word);


strcpy (List[j].word,temp.word);

}
Strong words and corresponding frequencies are output to Result.txt
FILE*FPX;
Fpx=fopen ("Result.txt", "w");
for (i=0;i<size;i++)
fprintf (FPX, "%s%d\n", list[i].word,list [I].time);
Fclose (FPX);
return 0;
}

Word Frequency statistics

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.