Reproduced
By default, Scrapy acquisition can only use a user-agent, which is easily blocked by the site, the following code can be randomly selected from a pre-defined list of user-agent to collect different pages
Add the following code in the settings.py
Downloader_middlewares = {
' scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware ': None,
' Crawler.comm.rotate_useragent.RotateUserAgentMiddleware ':
Note: Crawler is the name of your project through which is a directory of names underneath the spider's Code
#!/usr/bin/python #-*-coding:utf-8-*-import random from scrapy.contrib.downloadermiddleware.useragent Import
Useragentmiddleware class Rotateuseragentmiddleware (useragentmiddleware): Def __init__ (self, user_agent= "): Self.user_agent = User_agent def process_request (self, request, spider): #这句话用于随机选择user-agent ua = RA Ndom.choice (self.user_agent_list) if Ua:request.headers.setdefault (' User-agent ', UA) #the Defaul T user_agent_list composes chrome,i e,firefox,mozilla,opera,netscape user_agent_list = [\ ' Mozilla/5.0 (window s NT 6.1; WOW64) applewebkit/537.1 (khtml, like Gecko) chrome/22.0.1207.1 safari/537.1 "\" mozilla/5.0 (X11; CrOS i686 2268.111.0) applewebkit/536.11 (khtml, like Gecko) chrome/20.0.1132.57 safari/536.11 ", \" mozilla/5.0 (Wi Ndows NT 6.1; WOW64) applewebkit/536.6 (khtml, like Gecko) chrome/20.0.1092.0 safari/536.6 ", \" mozilla/5.0 (Windows NT 6.2) APPL ewebkit/536.6 (khtml, likeGecko) chrome/20.0.1090.0 safari/536.6 ", \" mozilla/5.0 (Windows NT 6.2; WOW64) applewebkit/537.1 (khtml, like Gecko) chrome/19.77.34.5 safari/537.1 ", \" mozilla/5.0 (X11; Linux x86_64) applewebkit/536.5 (khtml, like Gecko) chrome/19.0.1084.9 safari/536.5 ", \" mozilla/5.0 (Windows NT 6. 0) applewebkit/536.5 (khtml, like Gecko) chrome/19.0.1084.36 safari/536.5 ", \" mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1063.0 safari/536.3 ", \" mozilla/5.0 (Windows NT 5.1) Appl ewebkit/536.3 (khtml, like Gecko) chrome/19.0.1063.0 safari/536.3 ", \" mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1063.0 safari/536.3 ", \" mozilla/5.0 (Wind OWS NT 6.2) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1062.0 safari/536.3 ", \" mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1062.0 safari/536.3 ", \" mozilla/5.0 (windowS NT 6.2) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1061.1 safari/536.3 ", \" mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/536.3 (khtml, like Gecko) chrome/19.0.1061.1 safari/536.3 ", \" mozilla/5.0 (Windows NT 6.1) APPL ewebkit/536.3 (khtml, like Gecko) chrome/19.0.1061.1 safari/536.3 ", \" mozilla/5.0 (Windows NT 6.2) applewebkit/536 .3 (khtml, like Gecko) chrome/19.0.1061.0 safari/536.3 ", \" mozilla/5.0 (X11; Linux x86_64) applewebkit/535.24 (khtml, like Gecko) chrome/19.0.1055.1 safari/535.24 ", \" mozilla/5.0 (Windows NT 6.2;
WOW64) applewebkit/535.24 (khtml, like Gecko) chrome/19.0.1055.1 safari/535.24 "]