Python crawler Entry 1 crawls the proxy server URL and the python Proxy Server
I just learned that I only have a few regular expressions and can only climb one page .. Will be added to the test later
1 # coding: UTF-8 2 3 import urllib 4 import urllib2 5 import re 6 7 # capture proxy server address 8 Key = 1 9 url = 'HTTP: // www.xicidaili.com/nt/javass' % Key10 # print url11 12 user_agent = 'mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/55.0.2883.87 Safari/537.36 '13 headers = {'user-agent': user_agent} 14 15 try: 16 request = urllib2.Request (url, headers = headers) 17 response = urllib2.urlopen (requ Est) 18 html = response. read () 19 pattern = re. compile ('<td class = "country ".*? <Td> (\ d +). (\ d +) </td> .*? <Td> (\ d +) </td> ', re. S) 20 items = re. findall (pattern, html) 21 for item in items: 22 # if item! = 'Http' or 'http': 23 print "% s. % s. % s. % s: % s "% (item [0], item [1], item [2], item [3], item [4]) 24 hours t urllib2.URLError, e: 25 if hasattr (e, 'code'): 26 print e. code27 if hasattr (e, 'reason '): 28 print e. reason
Output
112.112.95.25:9999113.66.236.53:979714.221.165.46:9797123.121.79.213:9000219.133.10.211:9797113.109.248.12:979727.46.48.187:9797115.183.11.158:9999112.93.208.231:8080113.78.254.84:9000121.35.243.157:808042.157.5.154:9999218.75.144.25:9000113.65.8.221:9999218.56.132.158:808059.59.144.135:53281119.129.96.33:9797115.213.60.99:53281221.237.154.58:9797120.86.180.173:9797112.250.65.222:5328127.37.22.243:9000123.138.89.133:9999175.171.184.36:53281113.76.96.161:9797183.29.130.106:9000119.90.63.3:3128175.171.186.171:53281183.184.194.15:9797218.241.234.48:8080113.200.159.155:9999218.6.145.11:9797218.56.132.156:8080223.199.175.107:80814.221.166.140:9000220.249.185.178:9999122.72.18.34:80139.224.24.26:8888122.72.18.60:8061.163.139.168:9797202.120.46.180:443122.72.18.61:80125.45.87.12:9999116.85.24.26:8080222.86.191.44:8080112.74.94.142:312861.163.139.168:9797114.255.212.17:808118.178.228.175:3128122.72.18.35:80101.37.79.125:3128113.89.52.86:9999113.118.96.132:9797101.81.142.10:900061.155.164.106:3128114.115.140.25:3128171.37.176.140:979758.252.6.165:900061.163.39.70:9999121.8.170.53:9797175.174.118.141:8080118.119.168.172:9999171.37.143.140:9797119.39.68.212:808124.90.30.103:811859.38.61.23:97971.196.161.163:9999113.116.76.212:8088122.136.212.132:53281203.174.112.13:3128221.217.49.196:900014.29.84.50:8080175.17.156.139:8080175.17.174.218:9000114.221.125.161:8118123.139.56.238:9999113.87.163.152:808101.6.33.113:812361.155.164.112:3128180.140.161.138:9797221.7.49.209:53281120.9.75.45:9999183.184.112.78:9797116.236.151.166:8080119.122.2.160:9000119.129.96.142:9797116.52.195.113:999961.155.164.109:3128112.86.248.163:8118115.171.47.184:9000116.30.218.76:9000123.7.38.31:9999218.29.111.106:9999114.101.35.113:54214124.89.33.75:9999114.254.4.208:9797183.54.192.211:9797218.17.8.110:8118183.30.201.123:9797119.123.244.95:9000***Repl Closed***