Analysis: Because you want to download all the pictures, you do not need to filter. It's a lot simpler. The picture is generally in the "src=" behind.
Code:
#-*-coding:utf-8-*-__author__ = ' Bohn ' Import requests, Re, os from urllib.request import Urlretrieve def gethtml (u RL): #伪装头部 user_agent = ' mozilla/5.0 (Windows NT 10.0;
WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/65.0.3325.181 safari/537.36 ' headers = {' User-agent ': user_agent} Response = requests.get (URL) response.encoding = ' utf-8 ' HTML = response.text print ("Got html") Retu
RN HTML def getimg (HTML): #编写正则表达式 regex = ' src= ' (. *?) "' Pattern = Re.compile (regex, re. S) #找到正则表达式匹配的图片 imglist = Re.findall (pattern, HTML) print (imglist) x = 1 #判断这个文件夹是否存在, do not exist to create if n OT (os.path.exists (R ' D:\imgsaving ')): Os.makedirs (R ' D:\imgsaving ') #下载图片 for IMG in Imglist:print (
"Downloading page%s picture ..."% x) Urlretrieve (img, R ' D:\imgsaving\%s.jpg '% x) x = x + 1 return try: #爬取爱课程首页的图片 url = r "http://www.icourses.cn/home/" HTML = gethtml (URL) getimg (HTML) print ("OK") excEpt:print (' Failed. ')