PHP uses CurlFunctions to complete various file transfer operations, such as simulating a browser to send GET and POST requests. However, because php does not support multithreading, it is inefficient to develop crawler programs, therefore, you often need to use CurlMultiFunctions to access multiple URLs in multiple threads to Capture webpages or download files in multiple threads. for the specific implementation process, see the following examples: (1) the following code captures multiple URLs and uses Curl Functions in PHP to perform various file transfer operations, such as simulating a browser to send GET and POST requests, however, because the php language itself does not support multithreading, it is inefficient to develop crawler programs, therefore, you often need to use the Curl Multi Functions function to access multiple URLs in multiple threads to Capture webpages or download files in multiple threads. for the specific implementation process, see the following examples:
(1) the following code captures multiple URLs and then writes the page code of the crawled URLs to the specified file.
$ Urls = array (
'Http: // www.scutephp.com /',
'Http: // www.google.com /',
'Http: // www.example.com /'
); // Set the URL of the page to be crawled
$ Save_to = '/test.txt'; // write the captured code to this file
$ St = fopen ($ save_to, "");
$ Mh = curl_multi_init ();
Foreach ($ urls as $ I =>$ url ){
$ Conn [$ I] = curl_init ($ url );
Curl_setopt ($ conn [$ I], CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0 )");
Curl_setopt ($ conn [$ I], CURLOPT_HEADER, 0 );
Curl_setopt ($ conn [$ I], CURLOPT_CONNECTTIMEOUT, 60 );
Curl_setopt ($ conn [$ I], CURLOPT_FILE, $ st); // write The crawled code to a file
Curl_multi_add_handle ($ mh, $ conn [$ I]);
} // Initialization
Do {
Curl_multi_exec ($ mh, $ active );
} While ($ active); // execute
Foreach ($ urls as $ I =>$ url ){
Curl_multi_remove_handle ($ mh, $ conn [$ I]);
Curl_close ($ conn [$ I]);
} // End cleaning
Curl_multi_close ($ mh );
Fclose ($ st );
(2) the following code is similar to the above, except that the obtained code is first put into a variable, and then the obtained content is written into the specified file.
$ Urls = array (
'Http: // www.scutephp.com /',
'Http: // www.google.com /',
'Http: // www.example.com /'
);
$ Save_to = '/test.txt'; // write the captured code to this file
$ St = fopen ($ save_to, "");
$ Mh = curl_multi_init ();
Foreach ($ urls as $ I =>$ url ){
$ Conn [$ I] = curl_init ($ url );
Curl_setopt ($ conn [$ I], CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0 )");
Curl_setopt ($ conn [$ I], CURLOPT_HEADER, 0 );
Curl_setopt ($ conn [$ I], CURLOPT_CONNECTTIMEOUT, 60 );
Curl_setopt ($ conn [$ I], CURLOPT_RETURNTRANSFER, true); // do not write the crawling code to the browser, but convert it to a string.
Curl_multi_add_handle ($ mh, $ conn [$ I]);
}
Do {
Curl_multi_exec ($ mh, $ active );
} While ($ active );
Foreach ($ urls as $ I =>$ url ){
$ Data = curl_multi_getcontent ($ conn [$ I]); // Obtain The crawled code string
Fwrite ($ st, $ data); // write a string to a file
} // Get the data variable and write it to the file
Foreach ($ urls as $ I =>$ url ){
Curl_multi_remove_handle ($ mh, $ conn [$ I]);
Curl_close ($ conn [$ I]);
}
Curl_multi_close ($ mh );
Fclose ($ st );
(3) the following code uses PHP Curl Functions to implement concurrent multi-threaded file download.
$urls=array(
'http://www.scutephp.com/5w.zip',
'http://www.scutephp.com/5w.zip',
'http://www.scutephp.com/5w.zip'
);
$save_to='./home/';
$mh=curl_multi_init();
foreach($urls as $i=>$url){
$g=$save_to.basename($url);
if(!is_file($g)){
$conn[$i]=curl_init($url);
$fp[$i]=fopen($g,"w");
curl_setopt($conn[$i],CURLOPT_USERAGENT,"Mozilla/4.0(compatible; MSIE 7.0; Windows NT 6.0)");
curl_setopt($conn[$i],CURLOPT_FILE,$fp[$i]);
curl_setopt($conn[$i],CURLOPT_HEADER ,0);
curl_setopt($conn[$i],CURLOPT_CONNECTTIMEOUT,60);
curl_multi_add_handle($mh,$conn[$i]);
}
}
do{
$n=curl_multi_exec($mh,$active);
}while($active);
foreach($urls as $i=>$url){
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
fclose($fp[$i]);
}
curl_multi_close($mh);$urls=array(
'http://www.scutephp.com/5w.zip',
'http://www.scutephp.com/5w.zip',
'http://www.scutephp.com/5w.zip'
);
$save_to='./home/';
$mh=curl_multi_init();
foreach($urls as $i=>$url){
$g=$save_to.basename($url);
if(!is_file($g)){
$conn[$i]=curl_init($url);
$fp[$i]=fopen($g,"w");
curl_setopt($conn[$i],CURLOPT_USERAGENT,"Mozilla/4.0(compatible; MSIE 7.0; Windows NT 6.0)");
curl_setopt($conn[$i],CURLOPT_FILE,$fp[$i]);
curl_setopt($conn[$i],CURLOPT_HEADER ,0);
curl_setopt($conn[$i],CURLOPT_CONNECTTIMEOUT,60);
curl_multi_add_handle($mh,$conn[$i]);
}
}
do{
$n=curl_multi_exec($mh,$active);
}while($active);
foreach($urls as $i=>$url){
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
fclose($fp[$i]);
}
curl_multi_close($mh);