#!/usr/bin/env python
#
*Coding:utf-8
*
#author: Jacky
From Selenium.webdriver.common.keys import keys
From selenium import Webdriver
From BS4 import BeautifulSoup
Import XLWT
Driver = Webdriver. Firefox ()
Driver.implicitly_wait (3)
First_url = ' http://www.yidianzixun.com/channel/c6 '
Driver.get ( First_url)
Driver.find_element_by_class_name (' Icon-refresh '). Click ()
for I in range (1, $):
Driver.find_ Element_by_class_name (' Icon-refresh '). Send_keys (keys.down)
Soup = BeautifulSoup (driver.page_source, ' lxml ')
Print soup
articles=[]
for article in Soup.find all (class = ' Item Doc style-small-image Style-content-middle '):
title= article.find (class_= ' doc-title '). Get text ()
Source=article.find (class = ' source '). Get text ()
Comment=article.find (class = ' Comment-count '). Get_text ()
link= ' http:// Www.yidianzixun.com ' +article.get (' href ')
Articles.append ([Title,source,comment,link])
Print articles
Driver.quit ()
WBK=XLWT. Workbook (encoding= ' utf-8 ')
Sheet=wbk.add_sheet (' Yidianzixun ')
I=1
Sheet.write (0, 0, ' title ')
Sheet.write (0, 1, ' source ')
Sheet.write (0, 2, ' comment ')
Sheet.write (0, 3, ' link ')
For row in articles:
#print Row[0]
Sheet.write (I,0,row[0])
Sheet.write (I,1,row[1])
Sheet.write (I,2,row[2])
Sheet.write (I,3,row[3])
I +=1
Wbk.save (R ' Zixun\zixun.xls ')
Crawl News from the information site and save it to excel