# coding:utf-8# in[32]: #import requests#from bs4 Import beautifulsoup#res = Requests.get ("http://www.guancha.cn/ America/2017_01_21_390488_s.shtml ") #res. Encoding = ' utf-8 ' #soup = BeautifulSoup (Res.text, ' lxml ') # in[66]:speech_new = Open ("Speech.txt", ' r ', encoding = ' utf-8 '). Read () #当然你要有个英文文档speech = Speech_new.lower (). Split () #lower () change all uppercase to lowercase , SPLTT () splits the string by default to Space # In[70]:d ic = {}for I in speech:if I not in dic: #如果字符串不在dic字典里面 dic[i] = 1 #就加上去并附上1值 Else:dic[i] = dic[i] + 1 #有了的话值就加1 # In[68]:import operatorlist = sorted (Dic.items (), key = Operat Or.itemgetter (1), reverse=true) #dic items (), #key = Operator.itemgetter (1) In what sort, we tuple there are 0 and 1, we enter 1 #reverse =true Size Sort # in[94]:from Nltk.corpus import stopwords #自然语言处理stop_words = stopwords.words (' 中文版 ') #取出英文 Stop Word # in[103]:for k,v in list: #把tuple里面0给k, 1 to v if K not in Stop_words:print (K,V)
But Python3 comes with something really awesome.
# In[108]:from Collections Import Counter #2. Data structures that appear after 6 C = Counter (Speech) # In[111]:c.most_common (Ten) # In[113]:for SW in Stop_words: del c[sw] #删除里面的停用词 # In[114]:c.most_common (10)
It's a very simple statistic.
Python3 How to count common words in English documents? (Attached explanation)