1 ImportUrllib.request#Connect to Network2 ImportUrllib.parse#Crawler Parameters3 ImportJson#JSON parsing results4 ImportRe#Regular Expressions5 ImportMath#Generate random numbers6 7URL ="' #Crawler Portal Links8data = {}#parameter of the dictionary type9 Tendata = Urllib.parse.urlencode (data). Encode ('Utf-8')#Processing Parameters One Areq = Urllib.request.Request (url,data)#Request -Req.add_header ('user-agent','fake-client')#Add header information - theres = Urllib.request.urlopen (req)#Get response Information -html = Res.read (). Decode ('Utf-8')#read the response information and get the HTML page information - - #If there is a parenthesis in the regular expression, then the matching content is taken in parentheses to return the list, + #If there is more than one parenthesis, then the tuple list is returned -Req = Re.compile (r'')#a link to a picture in a regular expression matching page + Aurlsimg = Re.findall (req,html)#get all the picture links from the HTML at - forUrlimginchurlsimg: -Response = Urllib.request.urlopen (urlimg)#Get link Response -img = Response.read ()#read response information for binary information -With open ('img_%f.jpg'% (Math.random ()),'WB') as F: -F.write (IMG)#write binary information into the xxx.jpg picture
Getting Started with Python crawlers