Python crawls a picture of a Web site and places it in a specified folder
Copy Code code as follows:
#-*-Coding=utf-8-*-
Import Urllib2
Import Urllib
Import socket
Import OS
Import re
Def docment ():
Print U ' put a file in a E:\Python\ diagram (please enter a number or letter) '
H=raw_input ()
Path=u ' E:\Python\ map ' +str (h)
If not os.path.exists (path):
Os.makedirs (PATH)
return path
def getallurl (HTML):
Reg=r "a href= ' (. *?\.htm) '"
Allurl= Re.compile (REG)
Alllist = Re.findall (allurl,html)
Return alllist
def gethtml (URL):
Url=url
Req_header = {' user-agent ': ' mozilla/5.0 (Windows; U Windows NT 6.1; En-us; rv:1.9.1.6) gecko/20091201 firefox/3.5.6 '}
Req_timeout = 20
Html= ' Cuowu '
Try
req = Urllib2. Request (Url,none,req_header)
RESP = Urllib2.urlopen (req,none,req_timeout)
html = Resp.read ()
Except Urllib2. Urlerror as E:
Print E.message
Except Socket.timeout as E:
Gethtml (URL,FU)
return HTML
def getimg (Html,path):
reg = R ' img class=img_show border=0 src= (. *?\.jpg) '
Imgre= Re.compile (REG)
Imglist = Re.findall (imgre, HTML)
If imglist:
print ' GHASGHG ', path
For Imgurl in Imglist:
Print Imgurl
Content2=urllib2.urlopen (Imgurl). Read ()
With open (path+ '/' +imgurl[-7:], ' WB ') as code:
Code.write (Content2)
Else
return 0
def getallurl (HTML):
Reg=r "a href= ' (. *?\.htm) '"
Allurl= Re.compile (REG)
Alllist = Re.findall (allurl,html)
Return alllist
J=1
I=0
Print u ' Please enter URL: '
Ul=raw_input ()
Print u ' start download '
Print u ' first ' +str (j) +u ' page '
Html=gethtml (UL)
Alllist=getallurl (HTML)
Path=docment ()
Getimg (Html,path)
While I<len (alllist):
For the LIS in Alllist:
L=lis[i]
Url=r ' http://www.umei.cc/p/gaoqing/rihan/' +lis
I=i+1
J=j+1
html=gethtml (URL)
Getimg (Html,path)
Print u ' first ' +str (j) +u ' page '
Else
Print u ' download completed '