stock Data Directional crawler
(1) Objective: To obtain the name and transaction information of all stock in SSE and SSE
Sina Stock: http://finance.sina.com.cn/stock/
Baidu Stock: https://gupiao.baidu.com/stock/
selection Principle: The stock information static exists in the HTML page, non-JS code generation
there is no protocol limit for robots .
Selection Method: Browser F12, source code view, etc.
Choose mentality: Do not dwell on a website, find information source to try more
get a list of stocks:
East Net: http://quote.eastmoney.com/stocklist.html
Get stock Information:
Baidu Stock: https://gupiao.baidu.com/stock/
Single stock: https://gupiao.baidu.com/stock/sz002439.html (2) Structural design of the program Step 1: Get the stock list from East net
Get stock Information listing code:
# Get stock Info List # The first argument is the list type, which stores all the stock information # The second parameter is the URL def getstocklist (LST, stockurl) that gets the stock list: HTML = g Ethtmltext (Stockurl, GB2312 ) soup = beautifulsoup (html, ' Html.parser ' /strong>) a = Soup.find_all ( ' a ' ) for-I in a: & nbsp; try: #找到其属性 href = i.attrs[ ' href ' ] # Use regular expressions to find stock code lst.append (Re.findall ( R "[S][hz]\d{6}" , href) [0]) except: Continue
Step 2: According to the list of shares to Baidu shares to get stock information
Code:
# get stock info for each stock # first parameter stock list, second parameter URL Web site, third parameter store path def getstockinfo (LST, Stockurl, fpath): Count = 0 for stocks in LST: url = stockurl + stock +". html"html = gethtmltext (URL) try:if html = ="": Continue # Records all stock information infodict = {} soup = BeautifulSoup (HTML,' Html.parser ') Stockinfo = Soup.find (' div ', attrs={' class ':' Stock-bets ') # Get Stock name = Stockinfo.find_all (attrs={' class ':' Bets-name '}) [0] Infodict.update ({' stock name ': Name.text.split () [0]}) # get stock Information keylist = Stockinfo.find_all (' DT ') ValueList = Stockinfo.find_all (' DD 'For the I in Range (len (keylist)): key = Keylist[i].text val = valuelist[i].text Infodict[key] = val # writes data to a file with open (Fpath,' A ', encoding=' Utf-8 ') as F:f.write (str (infodict) +'\ n'Count = Count + 1 # implements crawl dynamic progress bar print ("\ rCurrent progress: {:. 2f}% ". Format (Count * 100/len (LST)), end="") except: #为了知道出错是在哪一行 traceback.print_exc () Count = count + 1 pri nt"\ rCurrent progress: {:. 2f}% ". Format (Count * 100/len (LST)), end="") Continue
Step 3: Store the results in the file (3) Overall code
# crawbaidustocksb.py import requests from BS4 import beautifulsoup import traceback import re def gethtmltext (URL, code ="Utf-8"): Try:r = requests.get (URL) r.raise_for_status () r.encoding = code return R.text Except:return"" # Get stock Information List # The first parameter is the list type, which stores all the stock information # The second parameter is the URL def getstocklist (LST, stockurl) that gets the stock list: HTML = Gethtmltext (Stockurl,"GB2312") soup = beautifulsoup (HTML,' Html.parser ') A = Soup.find_all (' A 'For i in A:try: #找到其属性 href = i.attrs[' href '] # Use the regular expression method to find the stock code Lst.append (re.findallr "[S][hz]\d{6}", href) [0]) Except:continue # get stock info for each stock # first parameter stock list, second parameter URL Web site, third parameter store path def getstockinfo (LST, sto Ckurl, Fpath): Count = 0 for the Lst:url = stockurl + stock +". html"html = gethtmltext (URL) try:if html = ="": Continue # Records all stock information infodict = {} soup = BeautifulSoup (HTML,' Html.parser ') Stockinfo = Soup.find (' div ', attrs={' class ':' Stock-bets ') # Get Stock name = Stockinfo.find_all (attrs={' class ':' Bets-name '}) [0] Infodict.update ({' stock name ': Name.text.split () [0]}) # get stock Information keylist = Stockinfo.find_all (' DT ') ValueList = Stockinfo.find_all (' DD 'For the I in Range (len (keylist)): key = Keylist[i].text val = valuelist[i].text Infodict[key] = val # writes data to a file with open (Fpath,' A ', encoding=' Utf-8 ') as F:f.write (str (infodict) +'\ n'Count = Count + 1 # implements crawl dynamic progress bar print ("\ rCurrent progress: {:. 2f}% ". Format (Count * 100/len (LST)), end="") except: #为了知道出错是在哪一行 traceback.print_exc () Count = count + 1 pri nt"\ rCurrent progress: {:. 2f}% ". Format (Count * 100/len (LST)), end=""Continue def main (): # Get the URL of the stock list Stock_list_url =' http://quote.eastmoney.com/stocklist.html '# get the URL of stock information Stock_info_url =' https://gupiao.baidu.com/stock/'# Save to disk's root directory output_file =' E:/baidustockinfo.txt 'Slist = [] Getstocklist (Slist, Stock_list_url) getstockinfo (Slist, Stock_info_url, output_file) main ()
(4) Output Results
{' Yesterday's receipt ': ' 1.14 ', ' turnover ': ' 3612 hands ', ' stock name ': ' Long letter in the Certificate of Energy ', ' discount rate ': '-0.62 ', ' turnover ': ' 418,000 ', ' Now open ': ' 1.14 ', ' net ': ' 1.1360 ', ' Max ': ' 1 ' .20 ', ' lowest ': ' 1.12 '}
{' Yesterday's receipt ': ' 1.03 ', ' turnover ': ' 651 hands ', ' stock name ': ' Long-letter optimization ', ' discount rate ': ' 2.40 ', ' turnover ': ' 66,800 ', ' open ': ' 1.03 ', ' net ': ' 1.0029 ', ' highest ': ' 1.03 ' , ' lowest ': ' 1.03 '}
{' Yesterday's Receipt ': ' 1.00 ', ' turnover ': ' 766 hands ', ' stock name ': ' Precision medical ', ' discount rate ': ' 0.74 ', ' turnover ': ' 77,000 ', ' Open ': ' 1.00 ', ' net ': ' 1.0006 ', ' highest ': ' 1.01 ' , ' lowest ': ' 1.00 '}
{' Yesterday's receipt ': ' 0.95 ', ' turnover ': ' 74 hands ', ' stock name ': ' Interconnected medical ', ' discount rate ': ' 0.64 ', ' turnover ': ' 7123 ', ' Now open ': ' 0.95 ', ' net ': ' 0.9459 ', ' highest ': ' 0.95 ', ' Minimum ': ' 0.95 '}
{' Yesterday's receipt ': ' 0.94 ', ' turnover ': ' 1 hands ', ' stock name ': ' Interconnection medical C ', ' Discount rate ': ' 1.13 ', ' turnover ': ' 95 ', ' Now open ': ' 0.95 ', ' net ': ' 0.9433 ', ' highest ': ' 0.95 ', ' most Low ': ' 0.95 '}
{' Yesterday's receipt ': ' 1.22 ', ' turnover ': ' 2424 hands ', ' stock name ': ' Biotech ', ' discount rate ': '-0.21 ', ' turnover ': ' 292,700 ', ' Now open ': ' 1.22 ', ' net ': ' 1.2075 ', ' Max ': ' 1 '. 22 ', ' lowest ': ' 1.21 '}
{' Yesterday's receipt ': ' 1.21 ', ' turnover ': ' 1284 hands ', ' stock name ': ' Bio C ', ' Discount rate ': '-0.68 ', ' turnover ': ' 153,800 ', ' Now open ': ' 1.21 ', ' net ': ' 1.2062 ', ' highest ': ' 1 '. 21 ', ' lowest ': ' 1.19 '}
{' Yesterday's receipt ': ' 1.08 ', ' turnover ': ' 125 hands ', ' stock name ': ' Chinese Medicine Fund ', ' discount rate ': ' 1.26 ', ' turnover ': ' 13,400 ', ' Now open ': ' 1.07 ', ' net ': ' 1.0715 ', ' highest ': ' 1.09 ' , ' lowest ': ' 1.06 '}
{' Yesterday's receipt ': ' 1.07 ', ' turnover ': ' 1794 hands ', ' stock name ': ' Chinese Medicine c ', ' Discount rate ': ' 0.62 ', ' turnover ': ' 192600 ', ' open ': ' 1.07 ', ' net ': ' 1.0694 ', ' highest ': ' 1.08 ' ', ' lowest ': ' 1.07 '}
{' Yesterday's receipt ': ' 0.97 ', ' turnover ': ' 26,900 hands ', ' stock name ': ' LOF upgrade ', ' discount rate ': '-1.13 ', ' turnover ': ' 2.5998 million ', ' open ': ' 0.97 ', ' Net worth ': ' 0.9760 ', ' most High ': ' 0.97 ', ' lowest ': ' 0.96 '}
{' Yesterday's receipt ': ' 1.07 ', ' turnover ': ' 532 hands ', ' stock name ': ' Brokerage fund ', ' discount rate ': ' 0.23 ', ' turnover ': ' 56,600 ', ' open ': ' 1.06 ', ' net ': ' 1.0636 ', ' highest ': ' 1.07 ' , ' lowest ': ' 1.06 '}
{' Yesterday's receipt ': ' 0.94 ', ' turnover ': ' 1507 hands ', ' stock name ': ' Guotai Rong Feng ', ' discount rate ': '-2.77 ', ' turnover ': ' 141,400 ', ' Now open ': ' 0.94 ', ' net ': ' 0.9647 ', ' highest ': ' 0 '. 94 ', ' lowest ': ' 0.94 '}
{' Yesterday's Receipt ': ' 1.00 ', ' turnover ': ' 9417 hands ', ' stock name ': ' Southern Crude ', ' discount rate ': '-1.39 ', ' turnover ': ' 943,300 ', ' Now open ': ' 1.00 ', ' net ': ' 1.0141 ', ' highest ': ' 1 '. 00 ', ' lowest ': ' 1.00 '}
{' Yesterday's receipt ': ' 0.86 ', ' turnover ': ' 2476 hands ', ' stock name ': ' Military fund ', ' discount rate ': ' 0.04 ', ' turnover ': ' 212,000 ', ' open ': ' 0.85 ', ' net ': ' 0.8567 ', ' highest ': ' 0.8 ' 6 ', ' lowest ': ' 0.85 '}
{' Yesterday's receipt ': ' 1.10 ', ' turnover ': ' 154 hands ', ' stock name ': ' The state-owned enterprise change ', ' discount rate ': ' 0.43 ', ' turnover ': ' 17,000 ', ' Now open ': ' 1.10 ', ' net ': ' 1.1058 ', ' highest ': ' 1.10 ', ' most Low ': ' 1.10 '}