php多線程thread開發與應用的例子

來源:互聯網
上載者:User

PHP擴充下載:https://github.com/krakjoe/pthreads

PHP手冊文檔:http://php.net/manual/zh/book.pthreads.php

在安裝好擴充之後,就可以運用多線程了,下面貼個通過搜尋結果抓取百度網盤內容的代碼:

 代碼如下 複製代碼

<?php
include 'include/CurlLoad.class.php'; // 引入讀取庫
/**
 * 多線程抓取內容
 * @param array $url 待抓取URL列表集合
 * @return 成功返回指定內容,失敗返回NULL
 */
function vget($url) {
 $ret = BaiduSRLinksGet ( $url, 1 ); // 擷取結果清單地址
 if ($ret != null) {
  if (array_key_exists ( "links", $ret )) {
   $infos = array ();
   $number = count ( $ret ['links'] );
   for($i = 0; $i < $number; $i ++) {//迴圈建立線程對象
    $thread_array [$i] = new baidu_thread_run ( $ret ['links'] [$i] );
    $thread_array [$i]->start ();
   }
   foreach ( $thread_array as $thread_array_key => $thread_array_value ) {//檢查線程是否執行結束
    while ( $thread_array [$thread_array_key]->isRunning () ) {
     usleep ( 10 );
    }
    if ($thread_array [$thread_array_key]->join ()) {//如果執行結束,取出結果
     $temp = $thread_array [$thread_array_key]->data;
     if ($temp != null)
      $infos ['res'] [] = $temp;
    }
   }
   $infos ['pages'] = $ret ['pages'];
   $infos ['status'] = "1";
  } else
  $infos = null;
 } else
  $infos = null;
 return $infos;
}
/**
 * 擷取百度搜尋結果列表URL
 *
 * @param string $url
 *         搜尋結果頁URL
 * @param int $format
 *         預設$format=0,擷取預設地址;$format=1擷取跳轉後真真實位址
 * @return NULL multitype:array()
 */
function BaiduSRLinksGet($url, $format = 0) {
 $html = CurlLoad::HtmlGet ( $url ); // 擷取頁面
 if ($html == null)
  return null;
 try {
  preg_match_all ( "/"url":"(?<links>.*)"}/", $html, $rets ); // 搜尋結果連結篩選
  if (! array_key_exists ( 'links', $rets )) // 如果數組中不包含Links鍵名,表示擷取失敗
   return null;
  $ret = array ();
  if ($format == 1) {
   $number = count ( $rets ['links'] );
   for($i = 0; $i < $number; $i ++) {
    $headr_temp = CurlLoad::Get_Headers ( $rets ['links'] [$i], 1 ); // 通過headr擷取真真實位址
    if (array_key_exists ( "Location", $headr_temp ))
     $ret ['links'] [$i] = $headr_temp ['Location'];
    else
     $ret ['links'] = $rets ['links'];
   }
  } else
   $ret ['links'] = $rets ['links'];
  preg_match_all ( '/href="?/s?wd=site%3Apan.baidu.com%20(?<url>.+?)&ie=utf-8">/', $html, $out );
  unset ( $out ['url'] [0] );
  $number = count ( $out ['url'] );
  for($i = 1; $i < $number; $i ++) {
   preg_match_all ( '/&pn=(.*)/', $out ['url'] [$i], $temp );
   $ret ['pages'] [$temp [1] [0] / 10] = base64_encode ( $out ['url'] [$i] );
  }
  return $ret;
 } catch ( Exception $e ) {
  WriteLog ( $e );
  return null;
 }
}
/**
 * 百度網盤資源資訊擷取
 *
 * @param string $url
 *         網盤資源頁URL
 * @return NULL array
 */
function PanInfoGet($url) {
 $html = CurlLoad::HtmlGet ( $url ); // 擷取頁面
 if ($html == null)
  return null;
 try {
  if (preg_match_all ( "/檔案名稱:(?<name>.*) 檔案大小:(?<size>.*) 分享者:(?<user>.*) 分享時間:(?<date>.*) 下載次數:(?<number>[0-9]+)/", $html, $ret ) == 0)
   return null;
  $rets ['name'] = $ret ['name'] [0];
  $rets ['size'] = $ret ['size'] [0];
  $rets ['user'] = $ret ['user'] [0];
  $rets ['date'] = $ret ['date'] [0];
  $rets ['number'] = $ret ['number'] [0];
  $rets ['link'] = $url;
  return $rets;
 } catch ( Exception $e ) {
  WriteLog ( $e );
  return null;
 }
}
function WriteLog($str) {
 $file = fopen ( "../error.log", "a+" );
 fwrite ( $file, "Warning:" . date ( "Y/m/d H:i:s" ) . ":" . $str . "rn" );
 fclose ( $file );
}
/**
 * 多線程抓取對象
 * @author MuXi
 *
 */
class baidu_thread_run extends Thread {
 public $url;
 public $data;
 public function __construct($url) {
  $this->url = $url;
 }
 public function run() {
  if (($url = $this->url)) {
   $this->data = PanInfoGet ( $url );//線程執行方法
  }
 }
}
?>

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.