爬取獵聘大資料崗位相關資訊--Python

來源:互聯網
上載者:User

標籤:soj   分析   arch   iter   amp   hbox   init   bee   相關   

獵聘網站搜尋大資料關鍵字,只能顯示100頁,爬取這一百頁的相關資訊,以便做分析。

__author__ = ‘Fred Zhao‘import requestsfrom bs4 import BeautifulSoupimport osimport csvclass JobSearch():    def __init__(self):        self.headers = {            ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36‘}        self.base_url = ‘https://www.liepin.com/zhaopin/?ckid=c1a868fa8b83aa5b&fromSearchBtn=2&init=-1&sfrom=click-pc_homepage-centre_searchbox-search_new&degradeFlag=0&key=大資料&headckid=c1a868fa8b83aa5b&d_pageSize=40&siTag=LGV-fc5u_67LtFjetF6ACg~fA9rXquZc5IkJpXC-Ycixw&d_headId=8e7325814e7ed9919787ee3fe85e1c94&d_ckId=8e7325814e7ed9919787ee3fe85e1c94&d_sfrom=search_fp&d_curPage=99&curPage=‘        self.base_path = os.path.dirname(__file__)    def makedir(self, name):        path = os.path.join(self.base_path, name)        isExist = os.path.exists(path)        if not isExist:            os.makedirs(path)            print("File has been created.")        else:            print(‘OK!The file is existed. You do not need create a new one.‘)        os.chdir(path)    def request(self, url):        r = requests.get(url, headers=self.headers)        return r    def get_detail(self, page):        r = self.request(self.base_url + page)        ul = BeautifulSoup(r.text, ‘lxml‘).find(‘ul‘, class_=‘sojob-list‘)        plist = ul.find_all(‘li‘)        self.makedir(‘job_data‘)        rows = []        for item in plist:            job_info = item.find(‘div‘, class_=‘sojob-item-main clearfix‘).find(‘div‘, class_=‘job-info‘)            position = job_info.find(‘h3‘).get(‘title‘)            print(position)            job_info_list = job_info.find_all(‘p‘)            job_condition = job_info_list[0].get(‘title‘)            print(job_condition)            job_time = job_info_list[1].find(‘time‘).get(‘title‘)            print(job_time)            company_info = item.find(‘div‘, class_=‘sojob-item-main clearfix‘).find(‘div‘, class_=‘company-info‘)            company = company_info.find(‘p‘, class_=‘company-name‘).find(‘a‘).get(‘title‘)            print(company)            rows.append([position, job_condition, job_time, company])        self.save_to_csv(rows)    def save_to_csv(self, rows):        with open(‘job.csv‘, ‘a‘) as f:            writer = csv.writer(f)            writer.writerows(rows)if __name__ == ‘__main__‘:    job = JobSearch()    for page in range(0, 100):        job.get_detail(str(page))

 

爬取獵聘大資料崗位相關資訊--Python

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.