Capture pictures of sisters and capture sisters
Import requests
From bs4 import BeautifulSoup
Import OS
Class mzitu ():
Def requst (self, url): # define the function for obtaining web pages
Headers = {'user-agent': "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/57.0.2987.20.safari/537.36 "} # simulate a browser to prevent anti-crawler attacks
Content = requests. get (url, headers = headers) # obtain webpage content
Return content
Def all_url (self, url): # define the function for obtaining the main webpage
Html = self. requst (url) # obtain all content of the main page
All_a = BeautifulSoup (html. text, 'lxml'). find ('div ', class _ = 'all'). find_all ('A') # parse webpages through beautifulsoup. Find the content of all images on the home page
Print (len (all_a) # print the total number of images
For a in all_a: # traverse all image links
Title = a. get_text () # assign the text in image attribute a to the title
Print (U' start to save: ', title)
Path = str (title). replace ("? ", '_') # Add? Replace with null
Self. mkdir (path) Stores slices by path
Href = a ['href '] # obtain the content link of each image.
Self.html (href)
Def html (self, href): # define the function to get the link of each page of each set of images
Html = self. requst (href) # obtain the webpage content of an image
# Because a set of images has multiple pages, each page has an image. Therefore, we need to analyze the links of each page of each set of images.
Max_span = BeautifulSoup (html. text, 'lxml '). find ('div ', class _ = 'pagenavi '). find_all ('span ') [-2]. get_text () # parse to get all the pages of each set of images
For page in range (1, int (max_span) + 1): # traverse pages
Page_url = href + '/' + str (page) # obtain the link of the image on each page, that is, add the number of songs to the link of each set of images
Self. img (page_url)
Def img (self, page_url): # obtain the image details on each page.
Img_html = self. requst (page_url) # obtain all information on each page
Img_url = BeautifulSoup (img_html.text, 'lxml '). find ('div ', class _ = 'main-image '). find ('img ') ['src'] # parse the page content
Self. save (img_url) # save the image
Def save (self, img_url): # defines the function for saving images.
Name = img_url [-9:-4] # image name
Img = self. requst (img_url) # obtain the image content
F = open ("E :\\ Python \ Pythonprogram \ 图 \" +name}'.jpg ',' AB ') # open the Save path
F. write (img. content) saves the image content
F. close () close
Def mkdir (self, path): # Save the file in the path
Path = path. strip ()
IsExists = OS. path. join ("E: \ Python \ Pythonprogram \ % s" % path) whether this path exists
If not isExists:
Print (u'create a folder named ', path, U ')
OS. makedirs (OS. path. join ("E: \ Python \ Pythonprogram \ % s" % path) # create a path if it does not exist
OS. chdir (OS. path. join ("E: \ Python \ Pythonprogram \ % s" % path) # Save the image to the path
Return True
Else:
Print (U' the folder named ', path, U' already exists ')
Return False
Mzitu = mzitu () instantiate the sister chart class
Mzitu. all_url ('HTTP: // www.mzitu.com/all') # enter the parameter URL