Or a vegetable, share code for the first time Ah, this is their previous write a PHP collection class, they have been used, I feel very simple and powerful, as long as the knowledge of a little selector can collect any page, but also support HTTPS page, do a simple collection enough to use.
<?php/** * Universal List Collection Class * Version V1.3 * Author: JAE * Blog: http://blog.jaekj.com/require_once '.
/phpquery/phpquery/phpquery.php ';
Class querylist{private $pageURL;
Private $REGARR = Array ();
Public $JSONARR = Array ();
Private $regRange;
Private $html; /************************************************ * Parameters: Page Address Selector array block selector * "Selector array" description: Format Array ("Name" =>arr
Ay ("selector", "type"),.......) * "Type" description: Value "text", "HTML", "property" * "Block selector": refers to the rule to select a few chunks, and then separately in the block to make the relevant choice *************************** /function Querylist ($pageURL, $regArr =array (), $regRange = ') {$this-&
Gt;pageurl = $pageURL;
In order to be able to obtain https://$ch = Curl_init ();
curl_setopt ($ch, Curlopt_url, $this->pageurl);
curl_setopt ($ch, Curlopt_ssl_verifypeer, false); curl_setopt ($ch, Curlopt_ssl_verifyhost, false);
curl_setopt ($ch, curlopt_returntransfer,1);
$this->html = curl_exec ($ch);
Curl_close ($ch);
if (!empty ($REGARR)) {$this->regarr = $REGARR;
$this->regrange = $regRange;
$this->getlist (); } function Setquery ($regArr, $regRange = ') {$this->jsonarr=ar
Ray ();
$this->regarr = $REGARR;
$this->regrange = $regRange;
$this->getlist (); Private Function GetList () {$hobj = phpquery::newdocumenthtml ($this->htm
L);
if (!empty ($this->regrange)) {$robj = PQ ($hobj)->find ($this->regrange);
$i = 0; foreach ($robj as $item) {WHIle (List ($key, $reg _value) =each ($this->regarr)) {$iobj = PQ ($item)->find ($reg _
Value[0]); Switch ($reg _value[1]) {case ' text ': $t
his->jsonarr[$i] [$key] = Trim (PQ ($iobj)->text ());
Break
Case ' HTML ': $this->jsonarr[$i] [$key] = Trim (PQ ($iobj)->html ());
Break
Default: $this->jsonarr[$i] [$key] = PQ ($iobj)->attr ($reg _value[1]);
Break
}//reset array pointer reset ($this->regarr);
$i + +; } else {while ($key, $reg _vAlue) =each ($this->regarr)) {$lobj = PQ ($hobj)->find ($reg _value[0]);
$i = 0;
foreach ($lobj as $item) {switch ($reg _value[1]) { Case ' text ': $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->text (
));
Break
Case ' HTML ': $this->jsonarr[$i ++][$key] = Trim (PQ ($item)->html ());
Break
Default: $this->jsonarr[$i ++][$key] = PQ ($item)->attr ($reg _value[1));
Break
}
}
}
} function Getjson () {return Json_encode ($this->jsonarr);
}
}
<?php
require ' query/querylist.class.php ';
Collect the Code share list of OSC, the title link author
$url = "Http://www.oschina.net/code/list";
$reg = Array (". Code_title a:eq (0)", "text"), "url" =>array (". Code_title a:eq (0)", "href"), "=>array" author "=>array" ("img", "title"));
$rang = ". Code_list li";
$HJ = new Querylist ($url, $reg, $rang);
$arr = $hj->jsonarr;
Print_r ($arr);
If you also want to collect the TOP40 active contributor image on the right side of the current page, get the JSON data so that you can write
$reg = Array ("Portrait" =>array (". Hot_top img", "src"));
$hj->setquery ($reg);
$json = $hj->getjson ();
Echo $json. "
<?php/** * wrote its own Baidu and Google search API * Version V2.0 * Author: JAE * Blog: http://blog.jaekj.com **/require_once ' querylist_class.php
';
Class Searcher {private $searcher;
Private $key;
Private $num;
Private $page;
Private $REGARR;
Private $regRange;
Private $regZnum;
Public $JSONARR;
Parameter search engine Search keyword Returns the result bar number of page function Searcher ($searcher, $key, $num, $page) {if ($searcher = = ' Baidu ') {$this->regarr = array ("title" =>array ("h3.t A, #ting_singlesong_box a", "text"), "Tcon" =>arra Y ("Div.c-abstract,font:slice (0,2), div#weibo,table tr:eq (0), Div.c-abstract-size p:eq (0), Div.vd_sitcom_new_tinfo",
"Text"), "url" =>array ("h3.t A, #ting_singlesong_box a", "href"));
$this->regrange = ' table.result,table.result-op ';
$this->regznum=array ("Znum" =>array ("Span.nums", "text")); else if ($searcher = = ' Google ') {$this->regarr = aRray ("title" =>array ("H3.r a", "text"), "Tcon" =>array ("Span.st", "text"), "url" =>array ("H3.r a", "href"));
$this->regrange = ' LI.G ';
$this->regznum=array ("Znum" =>array ("Div#resultstats", "text"));
} $this->searcher = $searcher;
$this->key = $key;
$this->num = $num;
$this->page = $page-1;
$this->getlist ();
Private Function GetList () {$s = UrlEncode ($this->key);
$num = $this->num;
$start = $this->num* $this->page; if ($this->searcher== ' Baidu ') {$url = "http://www.baidu.com/s?pn= $start &rn= $num &wd=
$s ";
$reg _znum= '/[\d,]+/'; else if ($this->searcher== ' Google ') {$url = "https://www.google.com.hk/search?f ilter=0&lr=&newwindow=1&safe=images&hl=en&as_qdr=all&num= $num; start= $start &q= $s ";
$reg _znum= '/([\d,]+) result (s)?/';
$SEARCHEROBJ = new Querylist ($url, $this->regarr, $this->regrange);
For ($i =0 $i <count ($searcherObj->jsonarr); $i + +) {if ($this->searcher== ' Baidu ') {$SEARCHEROBJ->jsonarr[$i] [' url '] = $this->getbaidurealurl ($searcherObj->jsonarr[$i] [' url '])
; else if ($this->searcher== ' Google ') {$searcherObj->jsonarr[$i] [' URL '
= = $this->getgooglerealurl ($searcherObj->jsonarr[$i] [' url ']);
}} $this->jsonarr = $SEARCHEROBJ->jsonarr;
Obtain a total number of results $searcherObj->setquery ($this->regznum);
$zNum = $searcherObj->jsonarr[0][' znum '];
Preg_match ($reg _znum, $zNum, $arr)? $zNum = $arr [0]: $zNum = 0;
$zNum = (int) str_replace (', ', ', $zNum); //Calculates the total number of pages $zPage = Ceil ($zNum/$this->num); $this->jsonarr=array (' num ' => $this->num, ' page ' => (int) $this->page+1), ' Znum ' => $zNum, ' zpage ' = > $zPage, "S" => "$this->key", ' Other ' =>array (' author ' => ' JAE ', ' QQ ' => ' 734708094 ', ' blog ' => ' http
://blog.jaekj.com '), ' data ' => $this->jsonarr);
function Getjson () {return Json_encode ($this->jsonarr);
Private Function Getbaidurealurl ($url) {//Get the real address of Baidu jump $header = Get_headers ($url, 1);
if ($header [0], ' strpos ') strpos ($header [0], ' 302 ')) {if (Is_array ($header [' Location ']))
{//return $header [' Location '][count ($header [' Location '])-1];
return $header [' Location '][0];
else {return $header [' Location '];
} else {return $url;
} Private Function Getgooglerealurl ($url) {$reg _url = '/q= (. +) &/u ';
Return Preg_match ($reg _url, $url, $arr)? UrlDecode ($arr [1]): $url;
}//$hj = new Searcher (' Google ', ' Oschina ', 20,2);
Print_r ($hj->jsonarr); Effect Demo Address//http://blog.jaekj.com//jae/demo/searcher/searcher_class.php?searcher=baidu&s=jaekj&num=20 &page=1