Java string Term Frequency Statistics instance code, java Term Frequency
Copy codeThe Code is as follows:
Package com. gpdi. action;
Import java. util. ArrayList;
Import java. util. Collections;
Import java. util. HashMap;
Import java. util. List;
Import java. util. Map;
Public class WordsStatistics {
Class Obj {
Int count;
Obj (int count ){
This. count = count;
}
}
Public List <WordCount> statistics (String word ){
List <WordCount> rs = new ArrayList <WordCount> ();
Map <String, Obj> map = new HashMap <String, Obj> ();
If (word = null ){
Return null;
}
Word = word. toLowerCase ();
Word = word. replaceAll ("'s ","");
Word = word. replaceAll (",","");
Word = word. replaceAll ("-","");
Word = word. replaceAll ("\\.","");
Word = word. replaceAll ("'","");
Word = word. replaceAll (":","");
Word = word. replaceAll ("! ","");
Word = word. replaceAll ("\ n ","");
String [] wordArray = word. split ("");
For (String simpleWord: wordArray ){
SimpleWord = simpleWord. trim ();
If (simpleWord! = Null &&! SimpleWord. inclusignorecase ("")){
Obj cnt = map. get (simpleWord );
If (cnt! = Null ){
Cnt. count ++;
} Else {
Map. put (simpleWord, new Obj (1 ));
}
}
}
For (String key: map. keySet ()){
WordCount wd = new WordCount (key, map. get (key). count );
Rs. add (wd );
}
Collections. sort (rs, new java. util. Comparator <WordCount> (){
@ Override
Public int compare (WordCount o1, WordCount o2 ){
Int result = 0;
If (o1.getCount ()> o2.getCount ()){
Result =-1;
} Else if (o1.getCount () <o2.getCount ()){
Result = 1;
} Else {
Int strRs = o1.getWord (). compareToIgnoreCase (o2.getWord ());
If (strRs> 0 ){
Result = 1;
} Else {
Result =-1;
}
}
Return result;
}
});
Return rs;
}
Public static void main (String args []) {
String word = "Pinterest is might be aa AB marketer's dream-ths site is largely used to curate products ";
WordsStatistics s = new WordsStatistics ();
List <WordCount> rs = s. statistics (word );
For (WordCount word1: rs ){
System. out. println (word1.getWord () + "*" + word1.getCount ());
}
}
}