The example in this article describes how Python implements downloading pictures from a feed. Share to everyone for your reference. Specific as follows:
This code is implemented based on Python 3.4, and Python2. X is a lot different than that.
This is an exercise, the data source from NetEase subscription. The code is as follows:
The code is as follows:
__author__ = ' Saint '
Import OS
Import Urllib.request
Import JSON
From Html.parser import Htmlparser
# Filter the contents of a picture from the contents of the retrieved Web page
Class Myhtmlparser (Htmlparser):
Links = []
def handle_starttag (self, Tag, attrs):
if tag = = "img":
If Len (attrs) = = 0:
Pass
Else
For name, value in Attrs:
If name = = "src":
Self.links.append (value)
Class down (object):
# The Total directory
Img_path = "E:/saint"
# download Directory
dir = "
# Capture Source Address
Collect_links = ["Http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/ Articlelist/t1420776257254-1 "," http://dy.163.com/v2/media/articlelist/T1376641060407-1 "]
Img_links = "Http://dy.163.com/v2/article"
def handlecollect (self):
For Collect_link in Self.collect_links:
Notice = "Start collecting pictures from [" + Collect_link + "]
Print (notice)
# Create a downloaded directory
Dir_name = Collect_link.split ("/") [-1]
Self.isdirexists (Dir_name)
Dict = Self.getlistfromsubscribe (Collect_link)
If dict = = False:
Print ("Data acquisition failed, continue (y/n)")
OP = input ();
If op = = "Y":
Os.system ("CLS")
Pass
Elif op = = "N":
Print ("Stop Acquisition")
Break
Else
Os.system ("CLS")
Print ("illegal input")
Break
Else
For page in Dict:
Page_uri = self.img_links + "/" + page["tid"] + "/" + page["DocId"]
Self.getimgfromuri (Page_uri)
Print ("Continue (y/n)")
New_op = input ();
if new_op = = "N":
Os.system ("CLS")
Print ("Capture complete")
Break
Print ("OK")
# Get catalogs from feeds
def getlistfromsubscribe (self, URI):
res = Urllib.request.urlopen (URI)
If Res.code < Res.code > 300:
Os.system ("clear")
Return False
Else
result = Res.read (). Decode ("GBK") # 3.4 version of Read () returns a byte type, requires decode () processing, and the option to encode the page
Dict = json.loads (Result)
If dict[' code ']! = 1:
Print (dict[' msg ")
Return False
Else
Return dict[' data '
# Get the pages of this subscription and extract the desired images from the page
def getimgfromuri (self, URI):
Html_code = Urllib.request.urlopen (URI). Read (). Decode ("GBK")
HP = Myhtmlparser ()
Hp.feed (Html_code)
Hp.close ()
For link in Hp.links: # hp.links is a list of download addresses for pictures
Self.writetodisk (link)
# Check if the file directory exists and if it does not, create a directory
def isdirexists (self, dir_name):
Self.dir = Self.img_path + dir_name
Isexists = os.path.exists (Self.dir)
If not isexists:
Os.makedirs (Self.dir)
Return True
Else
Return True
# download files and write to disk
def writetodisk (self, URL):
Os.chdir (Self.dir)
File = Urllib.request.urlopen (URL). Read ()
file_name = Url.split ("/") [-1]
Open (file_name, "WB"). Write (file)
Return True
if __name__ = = "__main__":
down = Down ()
Down.handlecollect ()
Hopefully this article will help you with Python programming.