1 #statistics of the number of occurrences of the characters in "Kingdoms"2 3 ImportJieba4Text = open ('Threekingdoms.txt','R', encoding='Utf-8'). Read ()5excludes = {'General','but said','Two people','can't','so','Jingzhou','not be','deliberations','How to','Sergeant','around','My Lord','Lead Soldiers','Next day','exultation','Military Horse',6 'World','Dong Wu','so'}7 #returns the word breaker result for a list type8Words =jieba.lcut (text)9 #by dictionary mapping, the number of statisticsTenCounts = {} One forWordinchwords: A ifLen (Word) = = 1: - Continue - elifWord = ='Ming Yue' orWord = ='Hung Ming': theRWord ='Zhuge Liang' - elifWord = ='Kuan Kung' orWord = ='Cloud Length': -RWord ='Guan Yu' - elifWord = ='Hyun Tak' orWord = ='Xuan de Yue': +RWord ='Liu Bei' - elifWord = ='Mengde' orWord = ='Prime Minister': +RWord ='Caocao' A Else: atRWord =Word -Counts[rword] = Counts.get (rword,0) + 1 - forWordinchexcludes: - delCounts[word] -Items =list (Counts.items ()) - #sort, from big to small inItems.Sort (key=LambdaX:x[1],reverse=True) - forIinchRange (5): toWord,count =Items[i] + Print('{0:<10}{1:>5}'. Format (Word,count))
Python uses Jieba library word frequency statistics