Importstring fromMatplotlibImportPyplot as PltImportMatplotlib.font_manager as Fmhist=[]defProcess_line (Line, hist):#generate a list of [+, ' the '] etc. forWordinchline.split (): Word= Word.strip (String.punctuation+string.whitespace)#remove spaces and punctuationWord.lower ()#lowercase ifWord not inchhist#generate lists and count numbersHist[word] = 1Else: Hist[word]=hist[word]+1#Hist[word] = Hist.get (word,0) + 1defprocess_file (filename): res={} with open (filename,'R') as F: forLineinchF:process_line (line, RES)returnRes#returns the post-statistics dictionarydefMost_word (hist, num): TMP= [] forKey,valueinchHist.items ():#sort key and value interchangeablytmp.append ([Value,key]) tmp.sort (reverse=True)returnTmp[:num]#slicesdefshowtable (data): forIinchRange (len (data)): Plt.bar (data[i][1:],data[i][:-1]) en= FM. Fontproperties (fname='C:\Windows\Fonts\simkai.ttf') Plt.legend (prop=ZH)#Complete Data LoadingPlt.xlabel (U'Word', fontproperties=ZH) Plt.ylabel (U'Frequency', fontproperties=ZH) plt.title (U'count the frequency of words appearing', fontproperties=en)#resizing picture OutputPng_size =PLT.GCF () png_size.set_size_inches (30.5, 18.5)#wide 1850x1050Png_size.savefig ("D:\word.png", dpi=100) plt.show ()if __name__=='__main__': hist= Process_file ("English.txt") Data= Most_word (hist,30) Print(data) showtable (data)
Python statistics text word occurrence frequency generate picture display