Ruby,nokogori, crawl through the latest 10 pages plus picture ratio and save it both in text and database
Copy Code code as follows:
#encoding: Utf-8
Require "Open-uri"
Require "Nokogiri"
Require "MySQL"
@dbh =mysql.real_connect ("localhost", "root", "Dengli", "Pachong")
@file =open ("Qb.txt", "W")
def getqiubai (URL)
Data=open (URL) {|f| f.read}
doc=nokogiri::html (data)
Doc.css (' DIV.BLOCK.UNTAGGED.MB15.BS2 '). each{|p|
Content=p.css (' div.content '). Text
@file. Puts ("#{content}\n")
IMG=P.CSS (' div.thumb img '). each{|img|
Imgaddr=img.attr (' src ')
@file. Puts ("#{imgaddr}\n")
Time=time.now
Sql= "INSERT into Qiushibaike (' body ', ' img ', ' time ') VALUES (' #{content} ', ' #{imgaddr} ', ' #{time} ')"
@dbh. Query (SQL)
}
}
End
For I in 1..10
@file. Puts ("This is #{i} page")
Url= "Http://www.qiushibaike.com/8hr/page/#{i}"
Getqiubai (URL)
End