/** * 根據URL採集網頁內容 * * @param string $url 連結地址 * @return string */
private function fetchbyurl($url){ $handle = fopen($url, ‘r’); $content = ”; while (!feof($handle)){ $content .= fgets($handle, 10000); } return $content; //?$this->utf8_iconv($content):”; } /*擷取所有匹配的內容 * @param string $str 內容 * @param string $start 起始匹配 * @param string $end 中止匹配 * @return array */ private function utf8_iconv($content){ return iconv(‘GBK’, ‘UTF-8′, $content); } private function strCutAll($str,$start,$end){ $content = explode($start,$str); $matchs = array(); $sum = count($content); for( $i = 1;$i < $sum;$i++ ){ $tmp = explode($end,$content[$i]); $matchs[] = $tmp[0]; unset($tmp); } return $matchs; } /*擷取第一個匹配的內容 * @param string $str 內容 * @param string $start 起始匹配 * @param string $end 中止匹配 * @return string */ private function strCut($str, $start, $end){ $content = strstr( $str, $start ); $content = substr( $content, strlen( $start ), strpos( $content, $end ) - strlen( $start ) ); return $content; } 測試: /*採集程式*/header("content-Type: text/html; charset=utf-8"); //$nr = file_get_contents(‘/webback/php/php-yi-ju-hua-hou-men-zhuan’); $nr = $this->fetchbyurl(‘/webback/php/php-yi-ju-hua-hou-men-zhuan’);//推薦,還可以用curl dump($this->strCut($nr,’<div class="context">’,'<div class="betterrelated">’));//得到內容。需要進一步過濾用(preg_match_all) dump($this->strCutAll($nr,’<title>’,'</title>’)); 得到標題 |