Use Python to obtain market recruitment information first, python recruitment information
This article describes how to obtain information about a website and how to obtain recruitment information from www.net.com. The key code for obtaining recruitment information from www.net.com using python is as follows:
Import reimport urllibimport urllib. request # obtain online data def begin (url): # the web browser to be disguised as (chrome I use) headers = ('user-agent ', 'mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/80') opener = urllib. request. build_opener () # Add the browser to be disguised as the corresponding http header opener. addheaders = [headers] # Read the corresponding url data = opener. open (url ). read () # decodes the obtained html to UTF-8 data = data. decode ('utf-8 ') Return data # process the data and return the dictionary city's urldef get_cityinfo (data): city_info1 = re. findall (r'dl> (.*?) </Dl> ', data, re. S) city_info2 = re. findall (R' (<.*? </A>) ', city_info1 [0], re. S) city_dict ={} for each in city_info2: key = re. findall ('> (.*?) </', Each, re. S) city_url = re. findall ('href = "(.*?) "', Each, re. s) city_dict [key [0] = city_url [0] return city_dict # obtain all the tags and obtain information from the tag def a_info (data): a_info = re. findall (R' (<. *? </A>) ', data [0], re. S) a_dict ={} for each in a_info: key = re. findall ('> (.*?) </', Each, re. S) a_url = re. findall ('"(.*?) "', Each, re. s) a_dict [key [0] = a_url [0] return a_dict # obtain all types of urban information. def get_cityinfoclass (): # obtain recruitment information first, I will continue to improve this module later. info = 'zhaopin/'Return info # obtain information about the corresponding city and its city classification def getzhaopin (city_info, infoclass ): # First engage in Chengdu information recruitment city_url = city_info ['chengdu '] cdzp_url = city_url + infoclass cdzp_info = begin (cdzp_url) return city_url, cdzp_info # obtain Chengdu recruitment information def complete (city_url, cdzp_info): allzp_info = re. findall ('class = "f-all -News "(.*?) </Div> ', cdzp_info, re. S) a_dict ={} class_info = re. findall (' <dd> (.*?) </Dd> ', allzp_info [0], re. S) for each in class_info: a_info = re. findall (R' (<.*? </A>) ', each, re. S) for each1 in a_info: key = re. findall ('> (.*?) </', Each1, re. S) a_url = re. findall ('href = "/(.*?) "', Each1, re. s) a_dict [key [0]. strip ()] = city_url + a_url [0] return a_dict # obtain the details of recruitment information def get_city_zpinfo_detail (url ): # obtain software engineer sorft_engineer = (zp_class_info ['Software Engineer ']) job_url_info = begin (sorft_engineer) get_detail_info (job_url_info) # process details page information def get_detail_info (list_info ): job_info = re. findall ('<dl class = "list-noimg job-list clearfix "(. *?) </Dl ', list_info, re. s) print (job_info [0]) if _ name __= = '_ main _': url = 'HTTP: // www.ganji.com/index.htm'; data = begin (url ); # All City Information city_info = get_cityinfo (data) # corresponding infoclass = Hangzhou () cdzp_url, xiaoshou = getzhaopin (city_info, infoclass) # retrieve recruitment category information zp_class_info = Beijing (cdzp_url, xiaoshou) get_city_zpinfo_detail (zp_class_info)
The above is all the content of this article, hoping to help you learn.