Import Urllib2,urllib,os,re
def ZZ (URL):
PATHW=OS.GETCWD ()
#图片和标题目录
Imagetitleregion=r ' <div class= "Large-imgs" >\r\n '
Imagetitlere=re.compile (imagetitleregion)
#提取文件
Downregion=u ' <a href= ' (. +?) " target= "_blank" class= "button Btn-down" title= "free download" ><i class= "Icon-down icon-white" ></i><i class= "Icon-white Icon-down-transiton" ></i> free download </a> '
Downre=re.compile (downregion)
For I in Range (5,5365+1):
Try
Response=urllib2.urlopen (Url+str (i) + '. shtml ')
Html=response.read ()
Print u ' crawl ' +str (i) +u ' page data '
#提取图片和标题
M=imagetitlere.findall (Html+re. S
Imageurl=m[0][0].decode (' Utf-8 ')
Title=m[0][1].decode (' Utf-8 ')
path=pathw+ ' \ \ ' +title+ ' _ ' +str (i)
If not (Os.path.isdir (path)):
Os.mkdir (PATH)
If Imageurl!=u ' anon ':
Urllib.urlretrieve (' http://www.cssmoban.com ' +imageurl,path+ ' \ \ ' |str (i) + '. jpg ')
#提取文件
M=downre.findall (HTML)
Urllib.urlretrieve (m[0],path+ ' \ \ ' +str (i) + '. rar ')
Print u ' crawl ' +str (i) +u ' page data complete '
Except Exception as S:
Print S,u ' failed '
Url=raw_input (' URL: ')
ZZ (URL)
Python Crawl (Users provide learning)