Function:
1. Obtain stock information of SSE SSE
2. Save the output to the file
Technical route: Requests-beatiful Soup-re
Candidate: Data site Selection
1, static web site, information static presence in HTML pages, non-JS code generation
2, F12, source code view
Find more sources of information
Method:
1. Get stock list information from East Net
2, according to the stock list to Baidu shares to get stock information
3. Store the results in a file
Import requests from BS4 import beautifulsoup import traceback import re def gethtmltext (URL): try:r = Reque
Sts.get (URL) r.raise_for_status () r.encoding = r.apparent_encoding return r.text except: Return "" Def getstocklist (LST, stockurl): HTML = gethtmltext (stockurl) soup = beautifulsoup (html, ' Html.parse R ') A = Soup.find_all (' A ') for i in a:try:href = i.attrs[' href '] lst.append (re. FindAll (R "[S][hz]\d{6}", href) [0]) except:continue def getstockinfo (LST, Stockurl, Fpath): for S tock in Lst:url = Stockurl + the stock + ". html" html = Gethtmltext (URL) try:if html== "" : Continue infodict = {} soup = BeautifulSoup (html, ' Html.parser ') sto Ckinfo = Soup.find (' div ', attrs={' class ': ' Stock-bets '}) name = Stockinfo.find_all (attrs={' class ': ' Bets-name '}) [0] Infodict. Update ({' Stock name ': Name.text.split () [0]}) keylist = Stockinfo.find_all (' dt ') ValueList = Stockinfo. Find_all (' DD ') for I in Range (len (keylist)): key = Keylist[i].text val = value
List[i].text Infodict[key] = val with open (Fpath, ' a ', encoding= ' utf-8 ') as F:
F.write (str (infodict) + ' \ n ') Except:traceback.print_exc () Continue def main (): Stock_list_url = ' http://quote.eastmoney.com/stocklist.html ' stock_info_url = ' https://gupiao.baidu.com/stock/' ou Tput_file = ' d:/baidustockinfo.txt ' slist=[] getstocklist (slist, Stock_list_url) getstockinfo (Slist, Stock_inf O_url, Output_file) main ()
Improve code--1, add progress bar, increase user experience
2,
Import requests from BS4 import beautifulsoup import traceback import re def gethtmltext (URL, code= "Utf-8"): Try:
r = Requests.get (URL) r.raise_for_status () r.encoding = code return R.text except: Return "" Def getstocklist (LST, stockurl): HTML = Gethtmltext (Stockurl, "GB2312") soup = beautifulsoup (html, ' H Tml.parser ') A = Soup.find_all (' A ') for i in a:try:href = i.attrs[' href '] lst.a
Ppend (Re.findall (R "[S][hz]\d{6}", href) [0]) except:continue def getstockinfo (LST, Stockurl, Fpath):
Count = 0 for the Lst:url = stockurl + stock + ". html" html = gethtmltext (URL) Try: If html== "": Continue infodict = {} soup = BeautifulSoup (html, ' HTML.P Arser ') Stockinfo = Soup.find (' div ', attrs={' class ': ' Stock-bets '}) name = Stockinfo.find_all (attrs ={' class ': ' Bets-name '}) [0] infodict.update ({' Stock name ': Name.text.split () [0]}) keylist = Stockinfo.find_all (' DT ')
ValueList = Stockinfo.find_all (' dd ') for I in Range (len (keylist)): key = Keylist[i].text val = valuelist[i].text Infodict[key] = val with open (Fpath, ' a ', encoding= ' utf-8 ' As F:f.write (str (infodict) + ' \ n ') Count = count + 1 print ("\ r Current Progress: {:. 2f}% ". Format (Count*100/len (LST)), end=" "Except:count = Count + 1 print (" \ R) Current progress: {:. 2f }% ". Format (Count*100/len (LST)), end=" ") Continue def Main (): Stock_list_url = ' http://quote.eastmoney.com/ stocklist.html ' Stock_info_url = ' https://gupiao.baidu.com/stock/' output_file = ' d:/baidustockinfo.txt ' slist =[] Getstocklist (slist, Stock_list_url) getstockinfo (Slist, Stock_info_url, output_file) main ()