Target Station: http://www.xicidaili.com/
Code:
<?Phprequire'lib/phpquery.php'; require'lib/querylist.php'; require"db/shared/ez_sql_core.php"; Require"db/mysql/ez_sql_mysql.php"; Require"public/function.php"; Use ql\querylist;//grab cat's Eye movie TOP100 list content$db =NewezSQL_mysql ('Root','Root','Spider','localhost'); for($j =1; $j <= -; $j + +) {$gurl="http://www.xicidaili.com/nn/". $j; $html=curl_request ($gurl); for($i =1; $i <= -; $i + +) {getipinfo ($html, $i, $db); } Echo"Section". $j."Page Complete". Php_eol;} function Getipinfo ($html, $t, $db) {$rules=Array (//Capture the plain text content of the element with ID one 'IP'= = Array ("#ip_list Tr:eq ($t) td:eq (1)",'text'),//IP 'Port'= = Array ("#ip_list Tr:eq ($t) Td:eq (2)",'text'),//Port ' Area'= = Array ("#ip_list Tr:eq ($t) Td:eq (3)",'text'),//location 'Anonymous'= = Array ("#ip_list Tr:eq ($t) Td:eq (4)",'text'),//is anonymous 'type'= = Array ("#ip_list Tr:eq ($t) Td:eq (5)",'text'),//type ' Speed'= = Array ("#ip_list Tr:eq ($t) Td:eq (6)",'HTML',"', Function ($content) {$num=explode ('%', Explode (':', $content) [1])[0]; if($num >= -&& $num < the){ return "General"; }Else if($num >= the){ return "soon"; }Else{ return "more slowly"; } }),//Speed 'Chtime'= = Array ("#ip_list Tr:eq ($t) Td:eq (8)",'text'),//survival time. 'Yztime'= = Array ("#ip_list Tr:eq ($t) Td:eq (9)",'text'),//Validation Time ); $data= Querylist::query ($html, $rules)data; Print_r ($data); $ip= $data [0]["IP"]; $port= $data [0]["Port"]; $area= $data [0][" Area"]; $anonymous= $data [0]["Anonymous"]; $type= $data [0]["type"]; $speed= $data [0][" Speed"]; $chtime= $data [0]["Chtime"]; $yztime= $data [0]["Yztime"]; $db->query ("INSERT into IP (IP, port,area,anonymous,type,speed,chtime,yztime)VALUES ('$ip','$port','$area','$anonymous','$type','$speed','$chtime','$yztime')");}
Results:
Full project Download: Https://files.cnblogs.com/files/wordblog/spider2.rar
PHP Simple crawler crawl free proxy IP 10,000