Search for Big Data keywords, can only display 100 pages, crawl this 100 pages of relevant information for analysis.
__author__ = ' Fred Zhao ' import requestsfrom bs4 import beautifulsoupimport osimport csvclass Jobsearch (): Def __init__ ( Self): Self.headers = {' user-agent ': ' mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) applewebkit/537.36 (khtml, like Gecko) chrome/55.0.2883.95 safari/537.36 '} Self.base_url = ' Https://www.liepin.com/zhaopin/?ckid=c1a868fa8b83aa5b&fromSearchBtn=2&init=-1&sfrom=click-pc_ homepage-centre_searchbox-search_new°radeflag=0&key= Big Data &headckid=c1a868fa8b83aa5b&d_ Pagesize=40&sitag=lgv-fc5u_67ltfjetf6acg~fa9rxquzc5ikjpxc-ycixw&d_headid= 8e7325814e7ed9919787ee3fe85e1c94&d_ckid=8e7325814e7ed9919787ee3fe85e1c94&d_sfrom=search_fp&d_ Curpage=99&curpage= ' Self.base_path = Os.path.dirname (__file__) def makedir (self, name): Path = OS.PA Th.join (Self.base_path, name) isexist = os.path.exists (path) if not isExist:os.makedirs (path) Print ("File has been createD. ") Else:print (' ok! The file is existed. You does not need create a new one. ') Os.chdir (PATH) def request (self, url): R = Requests.get (URL, headers=self.headers) return R def Get_de Tail (self, page): R = self.request (Self.base_url + page) ul = BeautifulSoup (R.text, ' lxml '). Find (' UL ', class _= ' sojob-list ') plist = Ul.find_all (' li ') self.makedir (' job_data ') rows = [] for item in plist : Job_info = item.find (' div ', class_= ' Sojob-item-main clearfix '). Find (' div ', class_= ' job-info ') posi tion = job_info.find (' h3 '). Get (' title ') print (position) Job_info_list = Job_info.find_all (' P ') Job_condition = Job_info_list[0].get (' title ') print (job_condition) Job_time = job_info_list[1] . Find (' time '). Get (' title ') print (job_time) Company_info = Item.find (' div ', class_= ' Sojob-item-main C Learfix '). Find (' div ', class_= ' Company-info ')Company = Company_info.find (' P ', class_= ' company-name '). Find (' a '). Get (' title ') of print (company) Rows.append ([Position, Job_condition, Job_time, company]) self.save_to_csv (rows) def save_to_csv (self, RO WS): With open (' job.csv ', ' a ') as F:writer = Csv.writer (f) writer.writerows (rows) if __name__ = = ' __main__ ': Job = jobsearch () for page in range (0, +): Job.get_detail (str (page))
Crawl and recruit big data jobs related information--python