1, the default city is Hangzhou, the code is as follows
#! -*-coding:utf-8-*-
From Urllib import request, parse
From BS4 import BeautifulSoup
Import datetime
Import XLWT
StartTime = Datetime.datetime.now ()
url = R ' https://www.zhipin.com/job_detail/?scity=101210100 '
# boss directly hires the URL address, default Hangzhou
def read_page (URL, page_num, keyword): # mimic browser
Page_headers = {
' Host ': ' www.zhipin.com ',
' User-agent ': ' mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/66.0.3359.181 safari/537.36 '
' chrome/45.0.2454.85 safari/537.36 115browser/6.0.3 ',
' Connection ': ' Keep-alive '
}
Page_data = Parse.urlencode ([# Browser request parameter
(' ka ', ' page-' +str (page_num)),
(' page ', page_num),
(' query ', keyword)
])
req = Request. Request (URL, headers=page_headers)
page = Request.urlopen (req, Data=page_data.encode (' Utf-8 ')). Read ()
page = Page.decode (' Utf-8 ')
Return page
if __name__ = = ' __main__ ':
Print (' ********************************** is about to crawl ********************************** ')
Keyword = input (' Please enter the position you want to search for: ')
Workbook = xlwt. Workbook ()
Sheet = workbook.add_sheet (' Sheet1 ')
I=0
For j in Range (1,5):
Soup=beautifulsoup (Read_page (URL, j, keyword))
For link in soup.select ('. Company-text '):
Sheet.write (I,0,link.get_text ())
I=i+1
Workbook.save ("D:\\resultslatest.xls")
Endtime = Datetime.datetime.now ()
Time = (endtime-starttime). seconds
Print (' Total time:%s s '% time)
2, crawl results
Simple python crawler crawls boss jobs and writes to Excel