#-*-Coding:utf-8-*-
__author__ = ' Administrator '
Import Urllib2,urllib,os,re
def Url1 (URL): #多个文件
Openr=urllib2.build_opener () #下载文件html代码, find the core code of the first floor
openr.add_handler=[(' user-agent ', ' mozilla/5.0 ')] #不加头信息则出现403错误和乱码
Html=openr.open (URL). Read ()
Regfloor= ' <div class= "Msgfont" > (. *?) </div> '
Html1=re.search (regfloor,html)
Html=html1.group ()
Return Html.decode (' Utf-8 ') #文件保存编码和文件编辑编码都是utf-8, so decode once, otherwise garbled, but does not affect the results.
def getimg (URL):
PAGEHTML=URL1 (URL) #从核心代码中照图图片地址, and download save, name
reg= ' ' #找到所有图片地址
Imag=re.findall (reg,pagehtml)
Dir=r ' G:\pic '
For index in xrange (Len (imag)):
Pic=str (index+1) + '. jpg '
Fine=os.path.join (Dir,pic)
Urllib.urlretrieve (Imag[index],fine)
Print fine+ ' OK '
Url= ' http://wangwei007.blog.51cto.com/68019/1351429 '
Getimg (URL)
Python Downloads multiple files