ImportRequestsImportJSONImporttimeposition= Input ('Enter the position you want to search for:') URL='Https://www.lagou.com/jobs/positionAjax.json?city=%E6%9D%AD%E5%B7%9E&needAddtionalResult=false'Headers= { 'user-agent':'mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/58.0.3029.110 safari/537.36',}prox= {#setting up a proxy server 'http':'http://47.89.48.239:808', 'HTTPS':'https://47.52.3.154:808'}defGet_page ():#get the maximum number of pages for each position through the positions you enter, and traverse them laterdata = { ' First':'false', 'PN':'1', 'KD': Position,} HTML= Requests.post (url,headers=headers,data=data,proxies =ProX) Json_data=json.loads (html.text) job_messages= json_data['content']['Positionresult'] Page= job_messages['TotalCount']/job_messages['resultsize']#The total number of jobs divided by the number of jobs displayed per page, the number of pages, but because you will the remainder, all make a judgmentnum = str (job_messages['TotalCount']/job_messages['resultsize']) [-1] ifNum! = 0:#if it's a decimal, add 1, not a decimal. Returns the number of pages directlytole_page = Int (page) + 1returnTole_pageElse: Tole_page=int (page)returnTole_pagedefGet_job_message (data):#get work information for each pagehtml = requests.post (url,headers=headers,data=data,proxies =ProX)ifHtml.status_code = = 200:#Show Status CodeJson_data =json.loads (html.text) job_messages= json_data['content']['Positionresult']['result'] forJob_messageinchJob_messages:company= job_message['Companyfullname']#Company NameCompanysize = job_message['companysize']#Company SizePositionname = job_message['Positionname']#Position nameSalary = job_message['Salary']#WagesWorkyear = job_message['Workyear']#Working yearsdistrict = job_message['District']#Work Address Print(company,companysize,positionname,salary,workyear,district)Else: Print('no page data obtained') forPninchRange (1,get_page () +1): Data= { ' First':'false', 'PN': Str (PN),'KD': Position,}Print(data) get_job_message (data) time.sleep (3)
Python Sets the proxy IP to crawl the job information on the web,