Import Jieba
txt = open (' C:/users/eternal/desktop/threekingdoms.txt ', ' R ', encoding= ' UTF-8 '). Read () #提前修改txt文件编码格式utf-8
excludes = {' General ', ' but said ', ' Jingzhou ', ' two ', ' not ', ' can't ', ' so '} #错误的名字
Words = jieba.lcut (TXT)
print (words)
Counts = {}
For word in words:
If Len (word) = = 1:
Continue
elif Word = = ' Zhuge Liang ' or word = = ' Hung Ming Yue ':
RWord = ' Hung Ming '
elif Word = = ' Guan Gong ' or word = = ' cloud length ':
RWord = = ' Guan Yu '
elif Word = = ' xuan de ' or word = = ' Xuan de Yue ':
RWord = ' Liu Bei '
elif Word = = ' Mengde ' or word = = ' Prime Minister ':
RWord = ' Caocao '
Else
RWord = Word
Counts[rword] = Counts.get (rword,0) + 1
For word in excludes:
Del Counts[word]
Items = List (Counts.items ())
Items.Sort (Key=lambda x:x[1],reverse=true)
Print (items)
For I in range (10):
Word,count = Items[i]
Print (' {0:<10}{1:>5} '. Format (Word,count))
"Python" Kingdoms word Frequency statistics