標籤:
Php多線程的使用,首先需要PHP5.3以上版本,並安裝pthreads PHP擴充,可以使PHP真正的支援多線程,擴充如何安裝請自行百度
PHP擴充下載:https://github.com/krakjoe/pthreads
PHP手冊文檔:http://php.net/manual/zh/book.pthreads.php
在安裝好擴充之後,就可以運用多線程了,下面貼個通過搜尋結果抓取百度網盤內容的代碼:
<?phpinclude ‘include/CurlLoad.class.php‘; // 引入讀取庫/** * 多線程抓取內容 * @param array $url 待抓取URL列表集合 * @return 成功返回指定內容,失敗返回NULL */function vget($url) { $ret = BaiduSRLinksGet ( $url, 1 ); // 擷取結果清單地址 if ($ret != null) { if (array_key_exists ( "links", $ret )) { $infos = array (); $number = count ( $ret [‘links‘] ); for($i = 0; $i < $number; $i ++) {//迴圈建立線程對象 $thread_array [$i] = new baidu_thread_run ( $ret [‘links‘] [$i] ); $thread_array [$i]->start (); } foreach ( $thread_array as $thread_array_key => $thread_array_value ) {//檢查線程是否執行結束 while ( $thread_array [$thread_array_key]->isRunning () ) { usleep ( 10 ); } if ($thread_array [$thread_array_key]->join ()) {//如果執行結束,取出結果 $temp = $thread_array [$thread_array_key]->data; if ($temp != null) $infos [‘res‘] [] = $temp; } } $infos [‘pages‘] = $ret [‘pages‘]; $infos [‘status‘] = "1"; } else $infos = null; } else $infos = null; return $infos;}/** * 擷取百度搜尋結果列表URL * * @param string $url * 搜尋結果頁URL * @param int $format * 預設$format=0,擷取預設地址;$format=1擷取跳轉後真真實位址 * @return NULL multitype:array() */function BaiduSRLinksGet($url, $format = 0) { $html = CurlLoad::HtmlGet ( $url ); // 擷取頁面 if ($html == null) return null; try { preg_match_all ( "/"url":"(?<links>.*)"}/", $html, $rets ); // 搜尋結果連結篩選 if (! array_key_exists ( ‘links‘, $rets )) // 如果數組中不包含Links鍵名,表示擷取失敗 return null; $ret = array (); if ($format == 1) { $number = count ( $rets [‘links‘] ); for($i = 0; $i < $number; $i ++) { $headr_temp = CurlLoad::Get_Headers ( $rets [‘links‘] [$i], 1 ); // 通過headr擷取真真實位址 if (array_key_exists ( "Location", $headr_temp )) $ret [‘links‘] [$i] = $headr_temp [‘Location‘]; else $ret [‘links‘] = $rets [‘links‘]; } } else $ret [‘links‘] = $rets [‘links‘]; preg_match_all ( ‘/href="?/s?wd=site%3Apan.baidu.com%20(?<url>.+?)&ie=utf-8">/‘, $html, $out ); unset ( $out [‘url‘] [0] ); $number = count ( $out [‘url‘] ); for($i = 1; $i < $number; $i ++) { preg_match_all ( ‘/&pn=(.*)/‘, $out [‘url‘] [$i], $temp ); $ret [‘pages‘] [$temp [1] [0] / 10] = base64_encode ( $out [‘url‘] [$i] ); } return $ret; } catch ( Exception $e ) { WriteLog ( $e ); return null; }}/** * 百度網盤資源資訊擷取 * * @param string $url * 網盤資源頁URL * @return NULL array */function PanInfoGet($url) { $html = CurlLoad::HtmlGet ( $url ); // 擷取頁面 if ($html == null) return null; try { if (preg_match_all ( "/檔案名稱:(?<name>.*) 檔案大小:(?<size>.*) 分享者:(?<user>.*) 分享時間:(?<date>.*) 下載次數:(?<number>[0-9]+)/", $html, $ret ) == 0) return null; $rets [‘name‘] = $ret [‘name‘] [0]; $rets [‘size‘] = $ret [‘size‘] [0]; $rets [‘user‘] = $ret [‘user‘] [0]; $rets [‘date‘] = $ret [‘date‘] [0]; $rets [‘number‘] = $ret [‘number‘] [0]; $rets [‘link‘] = $url; return $rets; } catch ( Exception $e ) { WriteLog ( $e ); return null; }}function WriteLog($str) { $file = fopen ( "../error.log", "a+" ); fwrite ( $file, "Warning:" . date ( "Y/m/d H:i:s" ) . ":" . $str . "rn" ); fclose ( $file );}/** * 多線程抓取對象 * @author MuXi * */class baidu_thread_run extends Thread { public $url; public $data; public function __construct($url) { $this->url = $url; } public function run() { if (($url = $this->url)) { $this->data = PanInfoGet ( $url );//線程執行方法 } }}?>
php多線程thread開發與應用的例子