This article describes how Python implements downloading pictures from a feed source. Share to everyone for your reference. Specifically as follows:
This code is based on Python 3.4 implementations, and Python2. X has a lot of difference.
This is an exercise where the data source comes from the NetEase subscription. The code is as follows:
Copy Code code as follows:
__author__ = ' Saint '
Import OS
Import Urllib.request
Import JSON
From Html.parser import Htmlparser
# Filter the contents of a picture from the content of the page you get
Class Myhtmlparser (Htmlparser):
Links = []
def handle_starttag (self, Tag, attrs):
if tag = = "img":
If Len (attrs) = = 0:
Pass
Else
For name, value in Attrs:
If name = = "src":
Self.links.append (value)
Class down (object):
# The Total directory
Img_path = "E:/saint"
# download Directory
dir = '
# Collect Source Address
Collect_links = ["Http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/ Articlelist/t1420776257254-1 "," http://dy.163.com/v2/media/articlelist/T1376641060407-1 "]
Img_links = "Http://dy.163.com/v2/article"
def handlecollect (self):
For Collect_link in Self.collect_links:
Notice = "Start collecting pictures from [" + Collect_link + "]
Print (notice)
# Create a downloaded directory
Dir_name = Collect_link.split ("/") [-1]
Self.isdirexists (Dir_name)
Dict = Self.getlistfromsubscribe (Collect_link)
If dict = = False:
Print ("Data acquisition failed, continue (y/n)")
OP = input ();
If op = = "Y":
Os.system ("CLS")
Pass
Elif op = = "N":
Print ("Stop Collection")
Break
Else
Os.system ("CLS")
Print ("illegal input")
Break
Else
For page in Dict:
Page_uri = self.img_links + "/" + page["tid" + "/" + page["DocId"]
Self.getimgfromuri (Page_uri)
Print ("Continue (y/n)")
New_op = input ();
if new_op = = "N":
Os.system ("CLS")
Print ("collection Complete")
Break
Print ("OK")
# Get directory from Feed
def getlistfromsubscribe (self, URI):
res = Urllib.request.urlopen (URI)
If Res.code < or Res.code > 300:
Os.system ("clear")
Return False
Else
result = Res.read (). Decode ("GBK") # 3.4 version of Read () returns a byte type, requires decode () processing, the option is the page encoding
Dict = json.loads (Result)
If dict[' code ']!= 1:
Print (dict[' msg '])
Return False
Else
Return dict[' data ']
# Get the current subscription page and extract the pictures you want from the page
def getimgfromuri (self, URI):
Html_code = Urllib.request.urlopen (URI). Read (). Decode ("GBK")
HP = Myhtmlparser ()
Hp.feed (Html_code)
Hp.close ()
For link in Hp.links: # hp.links is a list of download addresses for pictures
Self.writetodisk (link)
# Check the file directory for existence, if it does not exist, create a directory
def isdirexists (self, dir_name):
Self.dir = Self.img_path + dir_name
Isexists = os.path.exists (Self.dir)
If not isexists:
Os.makedirs (Self.dir)
Return True
Else
Return True
# download files and write to disk
def writetodisk (self, URL):
Os.chdir (Self.dir)
File = Urllib.request.urlopen (URL). Read ()
file_name = Url.split ("/") [-1]
Open (file_name, "WB"). Write (file)
Return True
if __name__ = = "__main__":
down = Down ()
Down.handlecollect ()
I hope this article will help you with your Python programming.