# crawl Picture
# target site: http://699pic.com/sousuo-218808-13-1.html
Import requests
from BS4 import BeautifulSoup
Import os
r = requests.get (' http://699pic.com/sousuo-218808-13-1.html ')
# r.content # returns a byte stream
Soup = BeautifulSoup (r.content, ' Html.parser ') # with the HTML parser, find r.content
# tu = Soup.find_all (' img ') # Find all the tags named "I MG "Object
Tu = Soup.find_all (class_=" lazy ") # Find all tags named" class_= "Lazy" "Object
for i in Tu:
# print (i)
#
Print (i[' data-original ') # get all URL addresses
# Crawl single picture
URL = ' Http://img95.699pic.com/photo/50061/5608.jpg_wh 300.jpg '
r = requests.get (URL)
F = open (' 123.jpg ', ' WB ') # Opens a file named 123.jpg as a binary write (suffix can be arbitrarily changed)
F.write ( R.content) # Writes a stream of R bytes to a file
F.close () # Close File
# Bulk Write:
# Create a path, create a folder named "Tupian"
Curpath = Os.path.dirname (Os.path.dirname (Os.path.realpath (__file__)))
Tupian = Os.path.join (Curpath, ' Tupian ')
If not os.path.exists (Tupian): # Determine if the folder named "Tupian" is not present
Os.mkdir (Tupian) # does not exist, then a folder named "Tupian" is created
# Write pictures in batches and save
For I in Tu:
Try
Jpg_url = i[' data-original '] # The address of the picture to get
name = i[' Alt ']
R = Requests.get (Jpg_url)
# Write the content and put it in the Tupian folder
f = open (Os.path.join (Tupian, '%s.jpg '%name), ' WB ')
F.write (r.content)
F.close ()
Except
Pass
Python interface Automation Test 18: Crawling pictures using the BS4 framework