Wrote a simple web crawler:
#Coding=utf-8 fromBs4ImportBeautifulSoupImportRequestsurl="http://www.weather.com.cn/textFC/hb.shtml"defget_temperature (URL): Headers= { 'user-agent':'mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/55.0.2883.87 safari/537.36', 'upgrade-insecure-requests':'1', 'Referer':'http://www.weather.com.cn/weather1d/10129160502A.shtml', 'Host':'www.weather.com.cn'} res= Requests.get (url,headers=headers) res.encoding="Utf-8"content= Res.content#got the ASCLL code.Content = Content.decode ('UTF-8')#turn into UTF-8 encoding #Print (content)Soup= BeautifulSoup (Content,'lxml') Conmidetab= Soup.find ('Div', class_='Conmidtab') Conmidetab2_list= Conmidetab.find_all ('Div', class_='CONMIDTAB2') forXinchconmidetab2_list:tr_list= X.find_all ('TR') [2:]#all the TRProvince ="'min=0 forIndex,xinchEnumerate (tr_list):ifindex = =0:td_list= X.find_all ('TD') Province= Td_list[0].text.replace ('\ n',"') City= Td_list[1].text.replace ('\ n',"') min= Td_list[7].text.replace ('\ n',"') Else: Td_list= X.find_all ('TD') City= Td_list[0].text.replace ('\ n',"') min= Td_list[6].text.replace ('\ n',"') Print(province,city,min)#province_list = tr_list[2] #td_list = Province_list.find_all (' TD ') #province_td = td_list[0] #province = Province_td.text ##print (province.replace (' \ n ', '))Get_temperature (URL)
Python---web crawler