A very simple crawler, crawling the site for http://www.tianqihoubao.com, you can modify the crawl of the city and crawl of the month, where the data crawled is January-July
fromBs4ImportBeautifulSoupImportRequestsImportPymysqlImportWarnings#Import Pinyin#From pinyin import Pinyin fromPypinyinImportPinyin, Lazy_pinyinImportPypinyinwarnings.filterwarnings ("Ignore") Conn= Pymysql.connect (host='localhost', user='Root', passwd='Root', db='test2', port=3306, charset='UTF8') Cursor=conn.cursor ()defget_temperature (url,city): Headers= { 'user-agent':'mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/63.0.3239.132 safari/537.36'}#Set header file informationResponse = Requests.get (URL, headers=headers). Content#submit a requests GET requestSoup = BeautifulSoup (Response,"lxml")#use BeautifulSoup to parseConmid2= Soup.findall ('Div', class_='Wdetail') #Conmid2 = Conmid.findall (' div ', class_= ' wdetail ') forInfoinchconmid2:tr_list= Info.find_all ('TR') [1:]#use a slice to take to the third TR label forIndex, TRinchEnumerate (tr_list):#Enumerate can return the position and content of the elementTd_list = Tr.find_all ('TD') #If Index = = 0:Date= Td_list[0].text.strip (). Replace ("\ n","")#Use the Replace () function to remove the newline character by using the text information for each labelWeather = Td_list[1].text.strip (). Replace ("\ n",""). Split ("/") [0].strip () temperature= Td_list[2].text.strip (). Replace ("\ n",""). Split ("/") [0].strip () Wind= Td_list[3].text.strip (). Replace ("\ n",""). Split ("/") [0].strip ()#Else: #city_name = td_list[0].text.replace (' \ n ', ') #weather = td_list[4].text.replace (' \ n ', ') #Wind = td_list[5].text.replace (' \ n ', ') #max = td_list[3].text.replace (' \ n ', ') #min = td_list[6].text.replace (' \ n ', ') Print(city,date, weather, wind, temperature) Cursor.execute ('INSERT INTO weather (city, date, weather, wind, temp) values (%s,%s,%s,%s,%s)', (city, date, weather, wind, temperature))if __name__=='__main__': #citys1= ["Chengdu City", "Guangyuan", "Mianyang", "Deyang", "Nanchong", "Guangan", "Suining", "Neijiang", "Leshan", "Zigong", "Luzhou", "Yibin", "Panzhihua", "Bazhong", "Dazhou", "Ziyang", " Meishan "," Yaan "," Congzhou "," Qionglai "," Doujiangyan "," Pengzhou "," Jiangyou "," Shifang "," Guanghan "," Mianzhu "," Langzhong "," Huaying "," Emeishan "," Wanyuan "," Jianyang "," Xichang "," Kangding City "," Maerkang City ", "Longchang City"] #citys1= ["Zhengzhou", "Kaifeng", "Luoyang", "Pingdingshan", "Anyang", "Hebi", "Xinxiang", "Jiaozuo", "Puyang", "Xuchang", "Leihe", "Sanmenxia Shi", "Nanyang", "Shangqiu", "Zhoukou", "Zhumadian" , "Xinyang", "Xingyang", "Xinzheng", "Dengfeng", "Xinmi", "Yanshi", "Mengzhou", "Qinyang", "Weihui", "Huixian", "Linzhou", "Yuzhou", "Changge", "Wugang", "Yima", "Lingbao", "Item City", "Gongyi", " Dengzhou "," Yongcheng "," Rruzhou "," Jiyuan "] #citys1= ["Hohhot City", "Baotou", "Wuhai", "Chifeng", "Tongliao", "Eerduosi", "Holonbuyr", "Bayan nur", "Wulanchabu City", "Huolinguolei", "Manzhouli", "Yakeshi", "Zalantun", " Eerguna "," Genhe "," Fengzhen "," Ulan Hot "," Aershan "," Erenhot "," Xilin Hot "] #citys1= ["Shenyang City", "Dalian", "Anshan", "Fushun", "Benxi", "Dandong", "Jinzhou", "Yingkou", "Fuxin", "Liaoyang", "Panjin", "Tieling", "Chaoyang", "Huludao", "Xinmin", "Wafangdian", "Zhuanghe", "Sea City", "East Haven", "Fengcheng", "Linghai", "Beizhen City", "Gaizhou", "Dashiqiao", "Dengta", "Diaobingshan City", "Kaiyuan", "Beipiao", "Lingyuan", "Hing City"] #citys1= ["Huludao", "Xinmin", "Wafangdian", "Zhuanghe", "Sea City", "East Haven", "Fengcheng", "Linghai", "Beizhen City", "Gaizhou", "Dashiqiao", "Dengta", "Diaobingshan City", "Kaiyuan", "Beipiao", "Lingyuan "," Hing City "]citys1= ["Kaiyuan","Beipiao","Lingyuan","Xing City"] forCityinchcitys1:city1="'. Join (Lazy_pinyin (city[:-1])) Print(city1) URLs= ['http://www.tianqihoubao.com/lishi/'+city1+'/month/201801.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201802.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201803.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201804.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201805.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201806.html', 'http://www.tianqihoubao.com/lishi/'+city1+'/month/201807.html'] forUrlinchurls:get_temperature (URL, city) conn.commit ()
Reptiles Crawl National Historical weather data