Python Learning-building an IP proxy pool

Source: Internet
Author: User

Code:

From BS4 import beautifulsoupfrom requests import Session, GET, postfrom time import Sleepimport randomimport Re, osclass Proxyippool (object): Def __init__ (self,page): object.__init__ (self) self.page = page def init_proxy_ip        _pool (self): url = ' https://www.kuaidaili.com/free/' tablelist = [' IP ', ' PORT ', ' type ', ' location '] IP = [] Port = [] Type = [] Position = [] R = Session () headers = {' Accept ': ' text/h tml,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 ', ' Accept-Encoding ': ' Gzip, def Late, Br ', ' accept-language ': ' zh-cn,zh;q=0.9 ', ' Connection ': ' keep-alive ', ' Host ': ' WWW.K Uaidaili.com ', # ' Referer ': url, # The URL for each page when you click on the next page is: Link from the previous page to the current page. For example: from Baidu into the proxy IP first page of the Referer URL is Baidu link ' upgrade-insecure-requests ': ' 1 ', ' user-agent ': ' mozilla/5.0 ( Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, LIke Gecko) chrome/64.0.3282.168 safari/537.36 '} if self.page > 1:url = URL + ' inha/' + str (s Elf.page) + '/' request = R.get (url,headers=headers,timeout=2,) print (request.status_code) soup = Bea        Utifulsoup (Request.text, ' lxml ') tags = soup.find_all (' TD ', attrs={' data-title ': tablelist}) # get all IPs Ip_tag_match = Re.compile (R ' data-title= "IP" > (. +?) </td ') ip.append (Ip_tag_match.findall (str (tags))) # get all ports Port_tag_match = Re.compile (R ' data-titl E= "PORT" > (. +?) </td ') port.append (Port_tag_match.findall (str (tags))) # get all types Type_match = Re.compile (R ' data-titl E= "Type" > (. +?) </td ') type.append (Type_match.findall (str (tags))) # get all locations Position_tag_match = Re.compile (R ' data- title= "Location" > (. +?) </td ') position.append (Position_tag_match.findall (str (tags))) Sleep (Random.random () *7) # IP, port, t Ype, position as a dictionary save data_title = {' IP ': IP, ' Port ': Port, ' type ': type, ' position ': position} return data_titledef Create_proxy_ip_pool (page): Pool = Proxyippool (page). Init_proxy_ip_pool () print (' Initialize complete! ' to start creating Agent pool ... ') IPList = pool.get (' ip ') portlist = Pool.get (' P Ort ') Typelsit = Pool.get (' type ') positionlist = pool.get (' position ') for I in range (0, Len (iplist[0]): P Rint (Format (iplist[0][i], ' <22 ') + format (portlist[0][i], ' <17 ') + format (typelsit[0][i], ' <12 ') + Positionlist[0][i]) Try:with open (' c:/users/adimin/desktop/proxyip.txt ', ' a ') as FP:FP.W Rite (Format (iplist[0][i], ' <22 ') + format (portlist[0][i], ' <17 ') + format (typelsit[0][i], ' <12 ') +  Positionlist[0][i] + ' \ r \ n ') except Fileexistserror as Err:print (err) os._exit (2) if __name__ = = ' __main__ ': print (' Initializing Agent pool ...        Please be patient ... ') print (Format (' IP ', ' ^16 ') + format (' PORT ', ' ^16 ') + format (' type ', ' ^16 ') + format (' location ', ' ^16 ')) Try: With open (' C:/users/adimin/desktop/proxyip.txt ', ' a ') as Fp:fp.write (format (' IP ', ' ^16 ') + format (' PORT ', ' ^16 ') + format (' type ', '            ^16 ') + format (' location ', ' ^16 ') + ' \ r \ n ') Except:with open (' C:/users/adimin/desktop/proxyip.txt ', ' W ') as FP: Fp.write (Format (' IP ', ' ^16 ') + format (' PORT ', ' ^16 ') + format (' type ', ' ^16 ') + format (' location ', ' ^16 ') + ' \ r \ n ') # do not know for What can only be done outside the loop to crawl multiple pages of IP if you change the code to loop in the Init_proxy_ip_pool function, you can crawl a little more than one page ... for i in range (1,2177): Create_proxy_ip_pool (i )

Operation Result:

Save to Local:

Python Learning-building an IP proxy pool

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.