Python implements the method of downloading images from the subscription source,
This example describes how to download images from a subscription source using Python. Share it with you for your reference. The details are as follows:
This code is implemented based on python 3.4, which is different from python2.X.
This is an exercise. The data source comes from Netease subscription. The Code is as follows:
Copy codeThe Code is as follows: __author _ = 'saint'
Import OS
Import urllib. request
Import json
From html. parser import HTMLParser
# Filter the image content from the obtained webpage content
Class MyHtmlParser (HTMLParser ):
Links = []
Def handle_starttag (self, tag, attrs ):
If tag = "img ":
If len (attrs) = 0:
Pass
Else:
For name, value in attrs:
If name = "src ":
Self. links. append (value)
Class Down (object ):
# Total directory
Img_path = "E:/saint"
# Download directory
Dir =''
# Collection source address
Collect_links = ["http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/articlelist/T1420776257254-1", "http://dy.163.com/v2/media/articlelist/T1376641060407-1"]
Img_links = "http://dy.163.com/v2/article"
Def handleCollect (self ):
For collect_link in self. collect_links:
Notice = "starting from [" + collect_link + "] collecting images"
Print (notice)
# Create a directory for download
Dir_name = collect_link.split ("/") [-1]
Self. isDirExists (dir_name)
Dict = self. getListFromSubscribe (collect_link)
If dict = False:
Print ("Data Collection failed, whether to continue (y/n )")
Op = input ();
If op = "y ":
OS. system ("cls ")
Pass
Elif op = "n ":
Print ("Stop Collection ")
Break
Else:
OS. system ("cls ")
Print ("illegal input ")
Break
Else:
For page in dict:
Page_uri = self. img_links + "/" + page ["tid"] + "/" + page ["docid"]
Self. getImgFromUri (page_uri)
Print ("continue (y/n )")
New_op = input ();
If new_op = "n ":
OS. system ("cls ")
Print ("Collected ")
Break
Print ("OK ")
# Retrieving directories from subscription sources
Def getListFromSubscribe (self, uri ):
Res = urllib. request. urlopen (uri)
If res. code <200 or res. code> 300:
OS. system ("clear ")
Return False
Else:
Result = res. read (). decode ("gbk") # read () of version 3.4 returns the byte type, which must be processed by decode (). The option is webpage encoding.
Dict = json. loads (result)
If dict ['code']! = 1:
Print (dict ['msg '])
Return False
Else:
Return dict ['data']
# Obtain the current subscription page and extract the desired image from the page
Def getImgFromUri (self, uri ):
Html_code = urllib. request. urlopen (uri). read (). decode ("gbk ")
Hp = MyHtmlParser ()
Hp. feed (html_code)
Hp. close ()
For link in hp. links: # hp. links is the image list.
Self. writeToDisk (link)
# Check whether the file directory exists. If not, create a directory.
Def isDirExists (self, dir_name ):
Self. dir = self. img_path + dir_name
IsExists = OS. path. exists (self. dir)
If not isExists:
OS. makedirs (self. dir)
Return True
Else:
Return True
# Download the file and write it to the disk
Def writeToDisk (self, url ):
OS. chdir (self. dir)
File = urllib. request. urlopen (url). read ()
File_name = url. split ("/") [-1]
Open (file_name, "wb"). write (file)
Return True
If _ name _ = "_ main __":
Down = Down ()
Down. handleCollect ()
I hope this article will help you with Python programming.