curl採集 根據關鍵詞 擷取雅虎競價排名_PHP教程

來源:互聯網
上載者:User
之前寫過curl批處理採集資料,這裡貼上完整版本,代碼很簡單,廢話不說,上代碼,新手歡迎指教!!!

代碼唯寫到 擷取到連結了,至於排名 後邊數組的鍵不就是排名嘍。。。

  1 php  2 /**  3  * Based on yahoo access to data  4  *  5  * @author chujiu <527891885@qq.com>  6  * @copyright 2014.04.26 By chujiu  7  * @version 0.2.1 2014.04.26  8  */  9  10 class DataCollectionRank { 11  12     const   PAGE = 10; 13     public  $path = ''; 14     public  $main = 91; 15      16     // 添加curl控制代碼 返回資源 17     private function _gather_data($keyword) { 18         if(empty($keyword)) { 19             return ''; 20         } 21         $chs = array(); // 控制代碼 22         $mh = curl_multi_init(); 23         for( $i=1; $i<=$this->main; $i+=self::PAGE ) { 24             $url = 'http://search.yahoo.co.jp/search?p='.urlencode($keyword).'&tid=top_ga1_sa&ei=UTF-8&aq=-1&oq='.urlencode($keyword).'&pstart=1&fr=top_ga1_sa&b='.$i; 25             $ch = curl_init(); 26             //設定選項 27             curl_setopt_array($ch, array( 28                 CURLOPT_URL => $url, 29                 CURLOPT_HEADER => false, 30                 CURLOPT_SSL_VERIFYPEER => false, 31                 CURLOPT_RETURNTRANSFER => true, 32                 CURLOPT_TIMEOUT => 30, 33                 CURLOPT_AUTOREFERER => true 34                 ) 35             ); 36             curl_multi_add_handle($mh, $ch); // 添加批處理控制代碼 37             $chs['handle'][$i]['ch'] = $ch; 38             $chs['handle'][$i]['url'] = $url; 39         } 40         $chs['mh'] = $mh; 41         return $chs; 42     } 43      44     // 處理CURL請求 45     public function exec_curl_get_data($keyword, $path) { 46         $error = ''; 47         $this->path = $path; 48         $chs = $this->_gather_data($keyword); 49         if(empty($chs)) return '';  50          // 執行批處理控制代碼 51         $active = null; 52         do { 53            $mrc = curl_multi_exec($chs['mh'],$active); 54            //$info = curl_multi_info_read($chs['mh']); 55         } while ($active > 0); 56         // 擷取資料 57         $responses = array(); 58         foreach($chs['handle'] as $k=>$ch){  59             if(curl_error($ch['ch'])){ 60                 $error .= "\n".'error提示:'.curl_error($ch['ch']).'-------URL:'.$ch['url'].'--------時間:'.date('Y-d-m H:i:s',time())."\n"; 61             } else { 62                 $responses[$k]['data'] = curl_multi_getcontent( $ch['ch'] ); 63             } 64              65             //curl_multi_info_read($mh); 66             // close current handler  67             curl_multi_remove_handle($chs['mh'], $ch['ch']);  68             curl_close($ch['ch']); 69         } 70         //關閉curl 批處理 71         curl_multi_close($chs['mh']); 72         $str = ''; 73         if($error != '') { 74             $this->_writeFile('get_rank_log.txt', $error, 'ab+'); 75         } 76         foreach ($responses as $val) { 77             if(!empty($val['data'])) { 78                 $str.= $this->_get_keyword_link_preg($val['data']); 79             } 80         } 81         $str = substr($str, 0 ,-1); 82         $contents = explode('|', $str); 83         return $contents; 84     } 85  86     // 過濾資料 擷取連結 87     private function _get_keyword_link_preg ($str) { 88         $res = ''; 89         if(empty($str)) { 90             return ''; 91         } 92         $arr = explode('', $str); 93         $arr1 = explode('', $arr[1]); 94         $arr2 = preg_replace('#[\s\S]+#', '', $arr1[0]); 95         $arr3 = preg_replace('#[\s\S]+#', '', $arr2); 96         $arr4 = preg_replace('#[\s\S]+?#', '', $arr3); 97         if(preg_match_all('#href=\"(.*?)\">#',$arr4,$arr5) !== false) { 98             foreach($arr5[1] as $val) { 99                 $res.= urldecode($val).'|';100             }101         }102         return $res;103     }104 105     // 寫入檔案106     public function _writeFile($fileName, $data, $method="rb+", $iflock=1, $check=1, $chmod=1){107         $check && @strpos($this->path.'/'.$fileName, '..')!==false && exit('403 Forbidden!');108         @touch($this->path.'/'.$fileName);109         $handle = @fopen($this->path.'/'.$fileName, $method);110         if($iflock) {111             @flock($handle,LOCK_EX);112         }113         $fw = @fwrite($handle,$data);114         if($method == "rb+") ftruncate($handle, strlen($data));115         fclose($handle);116         $chmod && @chmod($this->path.'/'.$fileName,0777);117     }118 }119 ?>

 1 function array_unique_fb($array){ 2     $temp = array(); 3     $data = array(); 4     foreach ($array as $value){ 5         $value = join(",",$value); //降維,也可以用implode,將一維數群組轉換為用逗號串連的字串 6         $temp[] = $value; 7     } 8         $temp = array_flip(array_flip($temp));    //去掉重複的字串,也就是重複的一維數組 9     foreach ($temp as $k => $value){10         $temp[$k] = explode(",",$value);   //再將拆開的數組重新組裝11     }12     foreach ($temp as $key => $value) {13         $data[$key]['keyword'] = $value[0];14         $data[$key]['domain'] = $value[1];15     }16     return $data;17 }

http://www.bkjia.com/PHPjc/770661.htmlwww.bkjia.comtruehttp://www.bkjia.com/PHPjc/770661.htmlTechArticle之前寫過curl批處理採集資料,這裡貼上完整版本,代碼很簡單,廢話不說,上代碼,新手歡迎指教!!! 代碼唯寫到 擷取到連結了,至於...

  • 聯繫我們

    該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

    如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.