Import requests from BS4 import beautifulsoup import traceback import re def gethtmltext (URL, code= "Utf-8"): Try:r =r Equests.get (URL) r.raise_for_status () r.encoding = code print ("test") return R.text Except:return "" Def get Stocklist (list,stockurl): HTML = Gethtmltext (Stockurl, "GB2312") print ("getstocklist start") soup = beautifulsoup (HTML, ' Html.parser ') a = Soup.find_all (' A ') for i in a:try:href = i.attrs[' href '] list.append (Re.findall (R) [s][hz]\d {6} ", href" [0]) except:continue def getstockinfo (list,stockurl,filepath): Count = 0 for the stock in List:url = St Ockurl + stock + ". html" HTML =gethtmltext (URL) try:if html== "": Continue infodict = {} soup = Beautiful Soup (HTML, "Html.parser") Stockinfo = Soup.find (' div ', attrs={' class ': ' Stock-bets '}) name = Stockinfo.find_all (attrs= {' class ': ' Bets-name '}) [0] Infodict.update ({' Stock name ': Name.next.split () [0]}) keylist =stockinfo.find_all (' dt ') Vaulelist = Stockinfo.find _alL (' DD ') for I-in range (Len (keylist)): Key =keylist[i].text vaule = Vaulelist[i].text infodict[key]= vaule With open (FilePath, ' a ', encoding= ' Utf-8 ') as F:f.write (str (infodict) + ' \ n ') count= count+1 print ("\ r Current Progress: {:. 2f}% '. Format (Count*100/len (list,end= ")) Except:count =count +1 print (" \ r Current progress: {:. 2f}% ". Format (Count*100/len (list), end= "") Continue def main (): Print ("Start") stock_list_url= ' http://quote.eastmoney.com/stocklist.html ' st Ock_info_url = ' https://gupiao.baidu.com/stock/' output_file = ' d:/baidustockinfo.txt ' slist=[] getstocklist (Slist,
Stock_list_url) getstockinfo (slist,stock_info_url,output_file) print ("End") Main ()