This example uses Golang to achieve word frequency statistics. Steps:
(1) Read an article from the file.
(2) statistical frequency, according to the occurrence of words from large to small order.
(3) write to the file.
Note: Any non-English letter symbol is considered a word delimiter (that is, equivalent to a space).
Efficiency: Use this program to count a 150W word article, about 70ms.
1. Core code:
Package Wordtestimport ("bytes" "FMT" "Io/ioutil" "OS" "Runtime" "Sort" "strings" "Time")//simple Word Frequency Statistics task Func counttestbase ( Inputfilepath string, Outputfilepath string) {//Time starting point start: = times. Now (). Unixnano ()/1e6//Read file Filedata, err: = Ioutil. ReadFile (Inputfilepath) checkerror (err, "read file") var fileText string = string (fileData)// According to the number of CPU cores the new open newrountinecount: = runtime. NUMCPU () *2-1runtime. Gomaxprocs (Newrountinecount + 1)//shard file Parts: = Splitfiletext (FileText, Newrountinecount) var ch chan map[string]int = Make (Chan Map[string]int, Newrountinecount) for I: = 0; i < Newrountinecount; i++ {go counttest (parts[i], ch)}//main thread receive data var totalwordsmap map[string]int = make (map[string]int, 0) Completecount: = 0for {receivedata: = <-chfor k, V: = Range Receivedata {totalwordsmap[strings. ToLower (k)] + = v}completecount++if Newrountinecount = = Completecount {break}}//add into slice and sort list: = Make ( Wordcountbeanlist, 0) for k, V: = range Totalwordsmap {list = append (list, Newwordcountbean (k, v))}sort. Sort (list)//Time EndPoint end: = time. Now (). Unixnano ()/1e6fmt. Printf ("Time consume:%dms\n", end-start)//output Wordscount: = List.totalcount () var data bytes. Bufferdata.writestring (FMT. Sprintf ("Program execution:%dms\n", End-start)) data. WriteString (FMT. Sprintf ("Total number of words in the article:%d\n\n", Wordscount)) for _, V: = Range list {var percent float64 = 100.0 * Float64 (V.count)/float64 (Word Scount) _, Err: = data. WriteString (FMT. Sprintf ("%s:%d,%3.2f%%\n", V.word, V.count, percent)) checkerror (Err, "bytes. Buffer, writestring ")}err = Ioutil. WriteFile (Outputfilepath, []byte (data. String ()), OS. Modeperm) CheckError (Err, "ioutil. WriteFile ")}func counttest (text string, ch Chan map[string]int) {var wordmap map[string]int = make (map[string]int, 0)//by Word Female read, all characters except 26 letters (case) nonspacing considered to be delimiter startindex: = 0letterStart: = FalseFor I, V: = Range Text {if (v >= && v <= 90) | | (v >= && v <= 122) {if!letterstart {Letterstart = Truestartindex = i}} else {if Letterstart {Wordmap[text[startindex:i]]++letterstart = Fals e}}}//Last Word if LetterstArt {wordmap[text[startindex:]]++}ch <-wordmap}//divides the full text into N-segment func splitfiletext (FileText string, n int) []string {length : = Len (fileText) Parts: = Make ([]string, n) lastpostion: = 0for I: = 0; i < n-1; i++ {Position: = length/n * (i + 1) for string (Filetext[position])! = "" {Position++}parts[i] = Filetext[lastpostion:pos Ition]lastpostion = position}//last paragraph parts[n-1] = Filetext[lastpostion:]return parts}func CheckError (err error, MSG String) {if err! = Nil {Panic (msg + "," + Err. Error ())}}
2. A struct
Package Wordtesttype wordcountbean struct {word stringcount int}func newwordcountbean (Word string, count int) * Wordcountbean {return &wordcountbean{word, Count}}type wordcountbeanlist []*wordcountbeanfunc (list wordcountbeanlist) Len () int {return Len (list)}func (list wordcountbeanlist) less (i, J int) bool {if List[i].count > li St[j].count {return true} else if List[i].count < List[j].count {return false} else {return List[i].word < LIST[J].W Ord}}func (list wordcountbeanlist) Swap (i, J int) {var temp *wordcountbean = List[i]list[i] = list[j]list[j] = Temp}func ( List wordcountbeanlist) totalcount () int {totalcount: = 0for _, V: = Range list {totalcount + = V.count}return TotalCount}
3. Main function:
Package Mainimport ("Wordstest/wordtest") func main () {inputfilepath: = "Files/article.txt" Outputfilepath: = "files/ Hanjun-result.txt "Wordtest. Counttestbase (Inputfilepath, Outputfilepath)}
Golang realization of Word frequency statistics