#!/usr/bin/evn python
-
-coding:cp936--
Import re #导入正则表达式模块
Import urllib #导入urllib模块, read page and download page need to use
def gethtml (URL): #定义getHtml () function to get the page source code
page = Urllib.urlopen (URL) #urlopen () Gets the source code of the pages according to the URL
html = page.read () #从获取的对象中读取内容
return HTML
def getImage (HTML): #定义getImage () function, used to get the image address and download
Reg = R ' src= "(. *?). JPG) "width" #定义匹配图片地址的url的正则表达式
Imgre = Re.compile (reg) #对正则表达式进行编译 for higher operating efficiency
ImageList = Imgre.findall (HTML) #使用findall () find the image URL of a matching regular expression in HTML
x = 0
For ImageUrl in ImageList:
Urllib.urlretrieve (ImageUrl, ' picture_%s.jpg '% x) #urlretrieve () download file
X +=1
URI = raw_input ("Please enter URL:")
R = R ' ^http://'
If Re.match (R,uri):
HTML2 = gethtml (URI)
Else
HTML2 = gethtml ("http//" + URI)
GetImage (HTML2)
Run Script test.py
Python writes web crawler