Nginx服务器屏蔽与禁止屏蔽网络爬虫的方法

【Nginx服务器屏蔽与禁止屏蔽网络爬虫的方法】server {listen80;server_name www.xxx.com;#charset koi8-r;#access_log logs/host.access.log main;#location / {#roothtml;#index index.html index.htm;#}if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Feedfetcher-Google|Yahoo! Slurp|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot|ia_archiver|Tomato Bot") {return 403;}location ~ ^/(.*)$ {proxy_pass http://localhost:8080;proxy_redirectoff;proxy_set_headerHost $host;proxy_set_headerX-Real-IP $remote_addr;proxy_set_headerX-Forwarded-For$proxy_add_x_forwarded_for;client_max_body_size10m;client_body_buffer_size 128k;proxy_connect_timeout90;proxy_send_timeout90;proxy_read_timeout90;proxy_buffer_size4k;proxy_buffers4 32k;proxy_busy_buffers_size 64k;proxy_temp_file_write_size 64k;}#error_page 404/404.html;# redirect server error pages to the static page /50x.html#error_page500 502 503 504 /50x.html;location = /50x.html {roothtml;}# proxy the PHP scripts to Apache listening on 127.0.0.1:80##location ~ \.php$ {#proxy_pass;#}# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000##location ~ \.php$ {#roothtml;#fastcgi_pass127.0.0.1:9000;#fastcgi_index index.php;#fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;#includefastcgi_params;#}# deny access to .htaccess files, if Apache's document root# concurs with nginx's one##location ~ /\.ht {#deny all;#}}