Help friend grabbed some proxy IP, and according to test the continuity of the joint, placed in the folder under the non-pass. Share the source code
Attention:
1, environmental Python3.5
2, install BEAUTIFULSOUP4 requests
The code is as follows:
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 66 |
#-*- coding:gb18030 -*-
from
bs4
import BeautifulSoup
import
requests
import
time
import
os,sys
all_url_add
=
{
‘url2‘
:
‘http://ip84.com/gn/‘
,
}
def
func(url):
r
=
requests.get(url)
content
=
r.text
soup
= BeautifulSoup(content,
"html.parser"
)
ListTable
=
soup.find_all(
"table"
,
class_
=
"list"
)
for
table
in
ListTable:
ListTr
=
table.find_all(
"tr"
)
for
tr
in
ListTr:
try
:
ListTd
=
tr.find_all(
"td"
)
ipaddress
=
str
(ListTd[
0
].get_text()).strip()
port
=
str
(ListTd[
1
].get_text()).strip()
city
=
str
(ListTd[
2
].get_text()).strip().replace(
"\n"
, "")
leixing
=
str
(ListTd[
3
].get_text()).strip()
xieyi
= str
(ListTd[
4
].get_text()).strip()
shudu
=
str
(ListTd[
5
].get_text()).strip()
time1
= str
(ListTd[
6
].get_text()).strip()
f
=
open
(
"ip"
+
‘.txt‘
,
‘a‘
)
f.write(ipaddress
+
":"
+
port
+
‘\n‘
)
f.close()
print
(
‘地址:‘
+
ipaddress
+
"端口:"
+
port
+
"地区:"
+
city
+
"类型:"
+ leixing
+
"协议"
+
xieyi
+
"速度"
+
shudu
+
"时间:"
+
time1)
except
Exception as e:
print
(u
"-------------------程序异常-----------------------"
)
return
‘success‘
print
(u
‘本页抓取结束,正在跳转下一页‘
)
def
pin():
f2
=
open
(
‘ip.txt‘
,
‘r‘
)
count
= len
(
open
(
‘ip.txt‘
,
‘rU‘
).readlines())
for
x
in
range
(count):
ip
=
f2.readline().split(
‘:‘
)[
0
]
return1
=
os.system(
‘ping -n 5 -w 5 %s‘
%
ip)
if
return1:
print
(
‘测试失败‘
)
else
:
print
(
‘测试成功,正在写入新文件‘
)
f3
=
open
(
‘SuccessIp.txt‘
,
‘a‘
)
f3.write(f2.readline()
+
‘\n‘
)
f3.close()
f2.close()
print
(
‘程序结束,可用IP已放在SuccessIp中‘
)
if
__name__
=
=
‘__main__‘
:
for
x
in
all_url_add:
print
(x)
for
y
in
range
(
1
,
50
):
url
=
all_url_add[x]
+
str
(y)
print
(url)
status
=
func(url)
if
status
=
=
‘success‘
:
print
(y,
‘页结束‘
)
print
(u
‘****程序抓取运行结束,正在检查所得IP连通性,请勿关闭窗口*****‘
)
pin()
|
A little messy, have time to store the data in the database, and then integrate this functionality into the blog.
Rex Blog All rights reserved
Python crawler crawls proxy IP and detects connectivity