PHP multi-threaded use, first need to PHP5.3 above version, and install pthreads PHP extension, can make PHP really support multi-threading, expand how to install please own Baidu
PHP Extensions Download: https://github.com/krakjoe/pthreads
PHP Manual Document: http://php.net/manual/zh/book.pthreads.php
After installing the extension, you can use multi-threaded, the following paste a search results by crawling Baidu Network content code:
<?PHPinclude' include/curlload.class.php ';//Introducing Read Libraries/** * Multi-threaded Crawl content * @param array $url the collection of URL list to crawl * @return successfully returned the specified content, failed to return null*/functionVget$url) { $ret= Baidusrlinksget ($url, 1);//Get the result list address if($ret!=NULL) { if(array_key_exists("Links",$ret )) { $infos=Array (); $number=Count($ret[' Links '] ); for($i= 0;$i<$number;$i++) {//looping through the creation of thread objects $thread _array[$i] =NewBaidu_thread_run ($ret[' Links '] [$i] ); $thread _array[$i]->start (); } foreach($thread _array as $thread _array_key=$thread _array_value) {//Check if thread execution ends while($thread _array[$thread _array_key]->isrunning ()) { Usleep(10 ); } if($thread _array[$thread _array_key]->Join()) {//if the execution is finished, the result is removed $temp=$thread _array[$thread _array_key]->data; if($temp!=NULL) $infos[' Res '] [] =$temp; } } $infos[' pages '] =$ret[' Pages ']; $infos[' status '] = "1"; } Else $infos=NULL; } Else $infos=NULL; return $infos;}/** * get Baidu search results list URL * * @param string $url * search results page URL * @param int $format * Default $format=0, get default address; $format =1 get the real address after the jump * @return NULL multitype:array ()*/functionBaidusrlinksget ($url,$format= 0) { $html= Curlload::htmlget ($url);//Get page if($html==NULL) return NULL; Try { Preg_match_all("/" url ":" (? <links>.*) "}/",$html,$rets);//Search Results Link filter if(!array_key_exists(' Links ',$rets))//If the links key name is not included in the array, it indicates that the get failed return NULL; $ret=Array (); if($format= = 1) { $number=Count($rets[' Links '] ); for($i= 0;$i<$number;$i++) { $headr _temp= Curlload::get_headers($rets[' Links '] [$i], 1);//get real address via Headr if(array_key_exists("Location",$headr _temp )) $ret[' Links '] [$i] =$headr _temp[' Location ']; Else $ret[' links '] =$rets[' Links ']; } } Else $ret[' links '] =$rets[' Links ']; Preg_match_all('/href= '?/s?wd=site%3apan.baidu.com%20 (? <url>.+?) &ie=utf-8 ">/",$html,$out ); unset($out[' URL '] [0] ); $number=Count($out[' URL '] ); for($i= 1;$i<$number;$i++) { Preg_match_all('/&pn= (. *)/',$out[' URL '] [$i],$temp ); $ret[' Pages '] [$temp[1] [0]/10] =Base64_encode($out[' URL '] [$i] ); } return $ret; } Catch(Exception $e) {Writelog ($e ); return NULL; }}/** * Baidu NET disk resource information Get * * @param string $url * Net disk resource Page URL * @return NULL array*/functionPaninfoget ($url) { $html= Curlload::htmlget ($url);//Get page if($html==NULL) return NULL; Try { if(Preg_match_all("/File name: (? <name>.*) file Size: (? <size>.*) shared by: (? <user>.*) Share time: (? <date>.*) Download number of times: (?<number> [0-9]+)/",$html,$ret) = = 0) return NULL; $rets[' name '] =$ret[' Name '] [0]; $rets[' size '] =$ret[' Size '] [0]; $rets[' user '] =$ret[' User '] [0]; $rets[' Date '] =$ret[' Date '] [0]; $rets[' number '] =$ret[' Number '] [0]; $rets[' link '] =$url; return $rets; } Catch(Exception $e) {Writelog ($e ); return NULL; }}functionWritelog ($str) { $file=fopen( ".. /error.log "," A + " ); fwrite($file, "Warning:".Date("Y/m/d h:i:s"). ":" .$str. "RN" ); fclose($file );}/** * Multi-threaded Grab Object * @author Muxi **/classBaidu_thread_runextendsThread { Public $url; Public $data; Public function__construct ($url) { $this->url =$url; } Public functionrun () {if(($url=$this-URL)) { $this->data = Paninfoget ($url);//Thread Execution Methods } }}?>
PHP Multithreading thread development and application examples