Use Python3 to write a script for capturing webpages and only webpage images,
The most basic code implementation for capturing webpage content:
#!/usr/bin/env python from urllib import urlretrieve def firstNonBlank(lines): for eachLine in lines: if not eachLine.strip(): continue else: return eachLine def firstLast(webpage): f = open(webpage) lines = f.readlines() f.close() print firstNonBlank(lines), lines.reverse() print firstNonBlank(lines), def download(url='http://www',process=firstLast): try: retval = urlretrieve(url)[0] except IOError: retval = None if retval: process(retval) if __name__ == '__main__': download()
The urllib module is used to capture images on a webpage:
import urllib.request import socket import re import sys import os targetDir = r"C:\Users\elqstux\Desktop\pic" def destFile(path): if not os.path.isdir(targetDir): os.mkdir(targetDir) pos = path.rindex('/') t = os.path.join(targetDir, path[pos+1:]) return t if __name__ == "__main__": hostname = "http://www.douban.com" req = urllib.request.Request(hostname) webpage = urllib.request.urlopen(req) contentBytes = webpage.read() for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))): print(link) urllib.request.urlretrieve(link, destFile(link))
Import urllib. request import socket import re import sys import OS targetDir = r "H: \ pic" def destFile (path): if not OS. path. isdir (targetDir): OS. mkdir (targetDir) pos = path. rindex ('/') t = OS. path. join (targetDir, path [pos + 1:]) # return t if _ name _ = "_ main _" separated __": hostname = "http://www.douban.com/" req = urllib. request. request (hostname) webpage = urllib. request. urlopen (req) contentBytes = w Ebpage. read () match = re. findall (R' (http: [^ \ s] *? (Jpg | png | gif) ', str (contentBytes) # R' (http: [^ \ s] *? (Jpg | png | gif) 'contains two parentheses, so there are two groups. # The list is returned, and the matching content in the brackets appears in the list for picname, picType in match: print (picname) print (picType) ''' output: http://img3.douban.com/pics/blank.gif gif http://img3.douban.com/icon/g111328-1.jpg. jpg http://img3.douban.com/pics/blank.gif. gif http://img3.douban.com/icon/g197523-19.jpg. gif... '''