1 #-*-coding:utf-8-*-2 """3 Created on Wed Sep 6 22:21:094 5 @author: Administrator6 """7 ImportNLTK8 fromNltk.bookImport*9 #Search for WordsTenText1.concordance ("Monstrous")#Find Keywords One A #search for similar words -Text1.similar ('Monstrous') - the #Search the common context -Text2.common_contexts (['Monstrous','very']) - - + #the distribution of words -Text4.dispersion_plot (['moustrous','very']) + A #the length of the vocabulary at Len (TEXT3) - - #Repetition word density -Len (TEXT3)/Len (Set (TEXT3)) - - #keyword density inText3.count ('smote') -100*text4.count ('a')/Len (TEXT4) to + deflexical_diversity (text): - returnLen (text)/Len (set (text)) the * defpercentage (count, total): $ return* Count/ TotalPanax Notoginseng - the +sent1=['Pager','Me','Ishmael','.'] A the #Gets the text word index, noting the length of the index, starting from zero +text3[172] - $Text3.index (' Love') $ - #frequency distribution, the judgment of common words - #simple statistics, frequency distribution theFdist1 =freqdist (Text1) - WuyiVocabulary1 =Fdist1.keys () thefdist1['Whale'] -Fdist1.plot (cumulative=True) Wu - #Low Frequency words About fdist1.hapaxes () $ - #Fine-grained word selection -V =Set (Text1) -Long_words = [w forWinchVifLen (W) >15] A Sorted (long_words) + the #Word frequency plus the length of words is decided at the same time -FDIST5 =freqdist (TEXT5) $Sorted ([w forWinchSet (TEXT5)ifLen (W) > 7 andFDIST5[W] > 7]) the the #common words collocation, double-element word collocation the fromNltk.utilImportBigrams theList (Bigrams ([' More',' is','said','than',' Done'])) - in the #the common collocation of double-element words the text4.collocations () About the #the length of each word in the text the[Len (W) forWinchText1] the + #the distribution of each length word, the output is a dictionary -fdist = Freqdist ([Len (W) forWinchText1]) the BayiFdist.keys ()#Index Value the Fdist.items () theFdist.max ()#The index of the word with the most words appearing - -FDIST[3]#where the index value is 3
Nlp-python Natural Language Processing 01