From __future__ import Divisionimport nltknltk.download () from nltk.book Import * #搜索文本text1. Concordance ("monstrous") # appear in a similar context in German vocabulary text1.similar ("monstrous") #两个或两个以上的词共同的上下文text2. Common_contexts (["Monstrous", "very"]) import matplotlib# discrete plots determine the position of the word in the text, starting with how many words text4.dispersion_plot in front of it (["Citizens", "democracy", "freedom", "duties", " American "]) #产生和该文本风格相近的段落text3. Generate () #返回所有标识符的个数len (TEXT3) #为每个标示符计数, set represents the collection, the elements in the collection occur only once sorted (set (TEXT3)) Len (Set (TEXT3)) len (TEXT3)/len (set (TEXT3)) #计算一个词在文本中出现次数, occupies a percentage of text3.count ("Smote") 100*text4.count ("a")/len ( TEXT4)
Fdist1=freqdist (Text1) #计算text1中的词频vocabulary =fdist1.keys () #关键字信息fdist1 [' Whale ']# ' whale ' words appear frequency fdist1.plot (50, Cumulative=true) #词频前50的词汇进行绘图V = set (Text1) #text1 output vocabulary set morphemes longer than 15 words long_words=[w for W in V If Len (w) > 15]text4.colloc ations () #搭配频繁出现的双连词 [Len (W) for W in Text1] #text1中每个词的词长fdist =freqdist ([Len (W) for W in Text1]) #每个词长对应出现的频率fdist # The length of the word is only 20 Fdist.max () #出现频率最高的词长fdist. Freq (3) #给定样本的频率, as a percentage of all words
Python Natural Language Processing Chapter 1