#!/usr/bin/python#-*-coding:utf-8-*-" "This script mainly achieve the page click, in addition to the sub-function point, there are three knowledge points: 1, random access to proxy IP, through the proxy IP to visit the designated site, the purpose is to prevent IP is blocked 2, access to a page, random rest a few seconds, and then access, The purpose is to prevent 4-7 layers of filtering devices in front of the site to intercept 3, modify the HTTP User agent field, and some websites and 4-7-tier devices will check created on 2013-7-14@author:qq136354553" " ImportUrllib2,re,time,urllib,random,user_agentsproxyipurl='http://www.goodips.com/?ip=&port=&dengji=&adr=%E7%94%B5%E4%BF%A1&checktime=&sleep=1%E7 %a7%92%e5%86%85&cunhuo=48%e5%b0%8f%e6%97%b6%e4%bb%a5%e4%b8%8a&px='#url = ' http://blog.csdn.net/chenfei_5201213/article/details/6868634 'classGetproxyip:#grab the proxy IP from the webpage and organize the format defgetproxyhtml (self):#code to grab the proxy IP pagepage =Urllib.urlopen (proxyipurl) HTML=Page.read ()#Print HTML returnHTMLdefipportre (self):#Remove the proxy IP and port from the page codeHTML =self.getproxyhtml ()#ip_re = Re.compile (R ' ((2[0-4]\d|25[0-5]|[ 01]?\d\d?) \.) {3} (2[0-4]\d|25[0-5]| [01]?\d\d?) ])Ip_re = Re.compile (r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) .+\n.+> (\d{1,5}) <') Ip_port=Re.findall (ip_re,html)returnIp_portdefProxyip (self):#format output proxy IP and portIp_port =self.ipportre ()#organize the proxy IP into [' 221.238.28.158:8081 ', ' 183.62.62.188:9999 '] formatProxyip = [] forIinchRange (0,len (ip_port)): Proxyip.append (':'. Join (Ip_port[i]))#organize the proxy IP into [{' http ': ' http://221.238.28.158:8081 '}, {' http ': ' http://183.62.62.188:9999 '}] formatProxy_list = [] forIinchRange (0,len (PROXYIP)): A0='http://%s'%Proxyip[i] A1= {'http':'%s'%A0} proxy_list.append (A1)returnproxy_listdefgethtml (URL): P=Getproxyip () proxy_list=P.proxyip () proxy_ip=random.choice (Proxy_list)#randomly take an IP in the proxy_list Printproxy_ip Proxy_support=Urllib2. Proxyhandler (PROXY_IP) opener=Urllib2.build_opener (proxy_support,urllib2. HttpHandler) Urllib2.install_opener (opener) Request=Urllib2. Request (URL) user_agent= Random.choice (user_agents.user_agents)#randomly take a user_agent in the user_agents.Request.add_header ('user-agent', user_agent)#Modify the User-agent field Printuser_agent HTML=Urllib2.urlopen (Request). Read ()Printproxy_ipreturnproxy_ip URLS= ['http://www.x'X'xxw.net/study.asp?vip=', 'http://www.x'X'x'X'x'X.com/?fromuid=16',] Count_true,count_false,count=0,0,0 whileTrue: forUrlinchUrls:count+=1Try: Proxy_ip=gethtml (URL)exceptUrllib2. Urlerror:#print ' urlerror! The bad proxy is%s '%proxy_ipCount_false + = 1exceptUrllib2. Httperror:#print ' httperror! The bad proxy is%s '%proxy_ipCount_false + = 1except: #print ' Unknown errors! The bad proxy is%s '%proxy_ipCount_false + = 1Randomtime= Random.uniform (1,3)#take a random floating-point number between 1-10Time.sleep (Randomtime)#Random Wait Time Print '%d eroors,%d OK, total%d'% (Count_false,count-count_false,count)
1 #!/usr/bin/python2 #-*-coding:utf-8-*-3 " "4 Created on 2013-7-145 6 @author: Administrator7 " "8 9User_agents = [Ten 'mozilla/5.0 (Windows; U Windows NT 5.1; It rv:1.8.1.11) gecko/20071127 firefox/2.0.0.11', One 'opera/9.25 (Windows NT 5.1; U EN)', A 'mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1;. NET CLR 1.1.4322;. NET CLR 2.0.50727)', - 'mozilla/5.0 (compatible; konqueror/3.5; Linux) khtml/3.5.5 (like Gecko) (Kubuntu)', - 'mozilla/5.0 (X11; U Linux i686; En-us; rv:1.8.0.12) gecko/20070731 ubuntu/dapper-security firefox/1.5.0.12', the 'lynx/2.8.5rel.1 libwww-fm/2.14 ssl-mm/1.4.1 gnutls/1.2.9' -]
View Code
Swipe Web page clicks with Agent