Copy Code code as follows:
#全局配置
Limit_req_zone $anti _spider zone=anti_spider:10m rate=15r/m;
#某个server中
Limit_req Zone=anti_spider burst=30 Nodelay;
if ($http _user_agent ~* "Xxspider|xxbot") {
Set $anti _spider $http _user_agent;
}
Exceeding the set limit frequency, it will give Spider a 503.
The above configuration detailed explanation Please Google, the specific Spider/bot name please customize.
Attached: Nginx in the ban network crawler
Copy Code code as follows:
server {
Listen 80;
server_name www.xxx.com;
#charset Koi8-r;
#access_log Logs/host.access.log Main;
#location/{
# root HTML;
# index index.html index.htm;
#}
if ($http _user_agent ~* "qihoobot| baiduspider| Googlebot| googlebot-mobile| googlebot-image| mediapartners-google| adsbot-google| Feedfetcher-google|yahoo! slurp|yahoo! slurp china| Youdaobot| sosospider| Sogou spider| Sogou Web spider| msnbot|ia_archiver| Tomato Bot ") {
return 403;
}
Location ~ ^/(. *) $ {
Proxy_pass http://localhost:8080;
Proxy_redirect off;
Proxy_set_header Host $host;
Proxy_set_header X-real-ip $remote _addr;
Proxy_set_header x-forwarded-for $proxy _add_x_forwarded_for;
Client_max_body_size 10m;
Client_body_buffer_size 128k;
Proxy_connect_timeout 90;
Proxy_send_timeout 90;
Proxy_read_timeout 90;
Proxy_buffer_size 4k;
Proxy_buffers 4 32k;
Proxy_busy_buffers_size 64k;
Proxy_temp_file_write_size 64k;
}
#error_page 404/404.html;
# REDIRECT Server error pages to the static page/50x.html
#
Error_page 502 503 504/50x.html;
Location =/50x.html {
root HTML;
}
# Proxy The PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# Proxy_pass http://127.0.0.1;
#}
# Pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root HTML;
# Fastcgi_pass 127.0.0.1:9000;
# Fastcgi_index index.php;
# Fastcgi_param Script_filename/scripts$fastcgi_script_name;
# include Fastcgi_params;
#}
# Deny access to. htaccess files, if Apache ' s document Root
# concurs with Nginx ' s one
#
#location ~/\.ht {
# Deny All;
#}
}
You can test it with curl.
Copy Code code as follows:
Curl-i-A "Qihoobot" www.xxx.com