Goutte怎麼擷取a標籤裡面的url,或者好用的PHP爬蟲庫,謝謝

來源:互聯網
上載者:User
Goutte怎麼擷取a標籤裡面的url?或者好用的PHP爬蟲庫,謝謝

 [],        'link'    => [],        'content' => [],        'source'  => [],        'date'    => [],    ];    public function __construct()    {        try {            $this->_client  = new Client();            $this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');            // $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);        } catch (Exception $e) {            throw new \Exception($e->getMessage(), 1);        }    }    public function getDate()    {        $this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {           $this->_news['date'][] = $node->text();        });    }    public function getTitle()    {        $link = $this->_crawler->selectLink('寧陝縣召開政協八屆二十二次次常委會')->link();        var_dump($link->getUri);die;        $this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {           if ($node->text() !== '寧陝要聞') {                $this->_news['title'][]  = $node->text();                $this->_news['link'][]   = $node->link();                $this->_news['source'][] = '寧陝要聞';           }        });    }}//-----------------------------------try {    $spider = new Spider();    $spider->getDate();    $spider->getTitle();    echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);} catch (Exception $e) {    echo $e->getMessage();}

回複內容:

Goutte怎麼擷取a標籤裡面的url?或者好用的PHP爬蟲庫,謝謝

 [],        'link'    => [],        'content' => [],        'source'  => [],        'date'    => [],    ];    public function __construct()    {        try {            $this->_client  = new Client();            $this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');            // $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);        } catch (Exception $e) {            throw new \Exception($e->getMessage(), 1);        }    }    public function getDate()    {        $this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {           $this->_news['date'][] = $node->text();        });    }    public function getTitle()    {        $link = $this->_crawler->selectLink('寧陝縣召開政協八屆二十二次次常委會')->link();        var_dump($link->getUri);die;        $this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {           if ($node->text() !== '寧陝要聞') {                $this->_news['title'][]  = $node->text();                $this->_news['link'][]   = $node->link();                $this->_news['source'][] = '寧陝要聞';           }        });    }}//-----------------------------------try {    $spider = new Spider();    $spider->getDate();    $spider->getTitle();    echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);} catch (Exception $e) {    echo $e->getMessage();}

現找的

$crawler = $client->request('GET', 'http://www.symfony.com/blog/');$link = $crawler->selectLink('Security Advisories')->link();print_r($link->getUri());

手冊:http://symfony.com/doc/curren...
GIT:https://github.com/FriendsOfP...

採集類參考:http://flc.ren/2016/06/528.html

  • 相關文章

    聯繫我們

    該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

    如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.