Environment Python 2.7.6,BS4, which can be run in PowerShell or at the command line. Make sure that the BS module is installed
The code is as follows:
#-*-Coding:utf8-*-
# 2013.12.36 19:41 wnlo-c209
# Grab a picture of dbmei.com.
From BS4 import BeautifulSoup
Import OS, sys, URLLIB2
# Create a folder, just learned yesterday
Path = OS.GETCWD () # Gets the directory where this script is located
New_path = Os.path.join (path,u ' watercress sister ')
If not Os.path.isdir (New_path):
Os.mkdir (New_path)
def page_loop (page=0):
url = ' http://www.dbmeizi.com/?p=%s '% page
Content = Urllib2.urlopen (URL)
Soup = beautifulsoup (content)
My_girl = Soup.find_all (' img ')
# Add end detection, write bad ....
If My_girl ==[]:
Print U ' has been completely crawled '
Sys.exit (0)
Print u ' start crawl '
For girl in My_girl:
link = girl.get (' src ')
Flink = ' http://www.dbmeizi.com/' + link
Print Flink
Content2 = Urllib2.urlopen (Flink). Read ()
with open (U ' watercress sister ' + '/' +flink[-11:], ' WB ') as code: #在OSC上现学的
Code.write (Content2)
page = Int (page) + 1
Print U ' start scratching and removing a page '
print ' The%s page '% page
Page_loop (page)
Page_loop ().